PyPI - synth-ai - Versions diffs - 0.4.1__py3-none-any.whl → 0.4.4__py3-none-any.whl - Mend

synth-ai 0.4.1py3-none-any.whl → 0.4.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of synth-ai might be problematic. Click here for more details.

Files changed (153) hide show

synth_ai/__init__.py +13 -13
synth_ai/cli/__init__.py +6 -15
synth_ai/cli/commands/eval/__init__.py +6 -15
synth_ai/cli/commands/eval/config.py +338 -0
synth_ai/cli/commands/eval/core.py +236 -1091
synth_ai/cli/commands/eval/runner.py +704 -0
synth_ai/cli/commands/eval/validation.py +44 -117
synth_ai/cli/commands/filter/core.py +7 -7
synth_ai/cli/commands/filter/validation.py +2 -2
synth_ai/cli/commands/smoke/core.py +7 -17
synth_ai/cli/commands/status/__init__.py +1 -64
synth_ai/cli/commands/status/client.py +50 -151
synth_ai/cli/commands/status/config.py +3 -83
synth_ai/cli/commands/status/errors.py +4 -13
synth_ai/cli/commands/status/subcommands/__init__.py +2 -8
synth_ai/cli/commands/status/subcommands/config.py +13 -0
synth_ai/cli/commands/status/subcommands/files.py +18 -63
synth_ai/cli/commands/status/subcommands/jobs.py +28 -311
synth_ai/cli/commands/status/subcommands/models.py +18 -62
synth_ai/cli/commands/status/subcommands/runs.py +16 -63
synth_ai/cli/commands/status/subcommands/session.py +67 -172
synth_ai/cli/commands/status/subcommands/summary.py +24 -32
synth_ai/cli/commands/status/subcommands/utils.py +41 -0
synth_ai/cli/commands/status/utils.py +16 -107
synth_ai/cli/commands/train/__init__.py +18 -20
synth_ai/cli/commands/train/errors.py +3 -3
synth_ai/cli/commands/train/prompt_learning_validation.py +15 -16
synth_ai/cli/commands/train/validation.py +7 -7
synth_ai/cli/commands/train/{judge_schemas.py → verifier_schemas.py} +33 -34
synth_ai/cli/commands/train/verifier_validation.py +235 -0
synth_ai/cli/demo_apps/demo_task_apps/math/config.toml +0 -1
synth_ai/cli/demo_apps/demo_task_apps/math/modal_task_app.py +2 -6
synth_ai/cli/demo_apps/math/config.toml +0 -1
synth_ai/cli/demo_apps/math/modal_task_app.py +2 -6
synth_ai/cli/demo_apps/mipro/task_app.py +25 -47
synth_ai/cli/lib/apps/task_app.py +12 -13
synth_ai/cli/lib/task_app_discovery.py +6 -6
synth_ai/cli/lib/train_cfgs.py +10 -10
synth_ai/cli/task_apps/__init__.py +11 -0
synth_ai/cli/task_apps/commands.py +7 -15
synth_ai/core/env.py +12 -1
synth_ai/core/errors.py +1 -2
synth_ai/core/integrations/cloudflare.py +209 -33
synth_ai/core/tracing_v3/abstractions.py +46 -0
synth_ai/data/__init__.py +3 -30
synth_ai/data/enums.py +1 -20
synth_ai/data/rewards.py +100 -3
synth_ai/products/graph_evolve/__init__.py +1 -2
synth_ai/products/graph_evolve/config.py +16 -16
synth_ai/products/graph_evolve/converters/__init__.py +3 -3
synth_ai/products/graph_evolve/converters/openai_sft.py +7 -7
synth_ai/products/graph_evolve/examples/hotpotqa/config.toml +1 -1
synth_ai/products/graph_gepa/__init__.py +23 -0
synth_ai/products/graph_gepa/converters/__init__.py +19 -0
synth_ai/products/graph_gepa/converters/openai_sft.py +29 -0
synth_ai/sdk/__init__.py +45 -35
synth_ai/sdk/api/eval/__init__.py +33 -0
synth_ai/sdk/api/eval/job.py +732 -0
synth_ai/sdk/api/research_agent/__init__.py +276 -66
synth_ai/sdk/api/train/builders.py +181 -0
synth_ai/sdk/api/train/cli.py +41 -33
synth_ai/sdk/api/train/configs/__init__.py +6 -4
synth_ai/sdk/api/train/configs/prompt_learning.py +127 -33
synth_ai/sdk/api/train/configs/rl.py +264 -16
synth_ai/sdk/api/train/configs/sft.py +165 -1
synth_ai/sdk/api/train/graph_validators.py +12 -12
synth_ai/sdk/api/train/graphgen.py +169 -51
synth_ai/sdk/api/train/graphgen_models.py +95 -45
synth_ai/sdk/api/train/local_api.py +10 -0
synth_ai/sdk/api/train/pollers.py +36 -0
synth_ai/sdk/api/train/prompt_learning.py +390 -60
synth_ai/sdk/api/train/rl.py +41 -5
synth_ai/sdk/api/train/sft.py +2 -0
synth_ai/sdk/api/train/task_app.py +20 -0
synth_ai/sdk/api/train/validators.py +17 -17
synth_ai/sdk/graphs/completions.py +239 -33
synth_ai/sdk/{judging/schemas.py → graphs/verifier_schemas.py} +23 -23
synth_ai/sdk/learning/__init__.py +35 -5
synth_ai/sdk/learning/context_learning_client.py +531 -0
synth_ai/sdk/learning/context_learning_types.py +294 -0
synth_ai/sdk/learning/prompt_learning_client.py +1 -1
synth_ai/sdk/learning/prompt_learning_types.py +2 -1
synth_ai/sdk/learning/rl/__init__.py +0 -4
synth_ai/sdk/learning/rl/contracts.py +0 -4
synth_ai/sdk/localapi/__init__.py +40 -0
synth_ai/sdk/localapi/apps/__init__.py +28 -0
synth_ai/sdk/localapi/client.py +10 -0
synth_ai/sdk/localapi/contracts.py +10 -0
synth_ai/sdk/localapi/helpers.py +519 -0
synth_ai/sdk/localapi/rollouts.py +93 -0
synth_ai/sdk/localapi/server.py +29 -0
synth_ai/sdk/localapi/template.py +49 -0
synth_ai/sdk/streaming/handlers.py +6 -6
synth_ai/sdk/streaming/streamer.py +10 -6
synth_ai/sdk/task/__init__.py +18 -5
synth_ai/sdk/task/apps/__init__.py +37 -1
synth_ai/sdk/task/client.py +9 -1
synth_ai/sdk/task/config.py +6 -11
synth_ai/sdk/task/contracts.py +137 -95
synth_ai/sdk/task/in_process.py +32 -22
synth_ai/sdk/task/in_process_runner.py +9 -4
synth_ai/sdk/task/rubrics/__init__.py +2 -3
synth_ai/sdk/task/rubrics/loaders.py +4 -4
synth_ai/sdk/task/rubrics/strict.py +3 -4
synth_ai/sdk/task/server.py +76 -16
synth_ai/sdk/task/trace_correlation_helpers.py +190 -139
synth_ai/sdk/task/validators.py +34 -49
synth_ai/sdk/training/__init__.py +7 -16
synth_ai/sdk/tunnels/__init__.py +118 -0
synth_ai/sdk/tunnels/cleanup.py +83 -0
synth_ai/sdk/tunnels/ports.py +120 -0
synth_ai/sdk/tunnels/tunneled_api.py +363 -0
{synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/METADATA +71 -4
{synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/RECORD +118 -128
synth_ai/cli/commands/baseline/__init__.py +0 -12
synth_ai/cli/commands/baseline/core.py +0 -636
synth_ai/cli/commands/baseline/list.py +0 -94
synth_ai/cli/commands/eval/errors.py +0 -81
synth_ai/cli/commands/status/formatters.py +0 -164
synth_ai/cli/commands/status/subcommands/pricing.py +0 -23
synth_ai/cli/commands/status/subcommands/usage.py +0 -203
synth_ai/cli/commands/train/judge_validation.py +0 -305
synth_ai/cli/usage.py +0 -159
synth_ai/data/specs.py +0 -36
synth_ai/sdk/api/research_agent/cli.py +0 -428
synth_ai/sdk/api/research_agent/config.py +0 -357
synth_ai/sdk/api/research_agent/job.py +0 -717
synth_ai/sdk/baseline/__init__.py +0 -25
synth_ai/sdk/baseline/config.py +0 -209
synth_ai/sdk/baseline/discovery.py +0 -216
synth_ai/sdk/baseline/execution.py +0 -154
synth_ai/sdk/judging/__init__.py +0 -15
synth_ai/sdk/judging/base.py +0 -24
synth_ai/sdk/judging/client.py +0 -191
synth_ai/sdk/judging/types.py +0 -42
synth_ai/sdk/research_agent/__init__.py +0 -34
synth_ai/sdk/research_agent/container_builder.py +0 -328
synth_ai/sdk/research_agent/container_spec.py +0 -198
synth_ai/sdk/research_agent/defaults.py +0 -34
synth_ai/sdk/research_agent/results_collector.py +0 -69
synth_ai/sdk/specs/__init__.py +0 -46
synth_ai/sdk/specs/dataclasses.py +0 -149
synth_ai/sdk/specs/loader.py +0 -144
synth_ai/sdk/specs/serializer.py +0 -199
synth_ai/sdk/specs/validation.py +0 -250
synth_ai/sdk/tracing/__init__.py +0 -39
synth_ai/sdk/usage/__init__.py +0 -37
synth_ai/sdk/usage/client.py +0 -171
synth_ai/sdk/usage/models.py +0 -261
{synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/WHEEL +0 -0
{synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/entry_points.txt +0 -0
{synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/licenses/LICENSE +0 -0
{synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/top_level.txt +0 -0

synth_ai/cli/commands/status/utils.py CHANGED Viewed

@@ -1,114 +1,23 @@
-"""Shared utilities for status commands."""
+"""Utility helpers for status commands."""
 from __future__ import annotations
-import asyncio
-from collections.abc import Callable, Coroutine
-from datetime import UTC, datetime, timedelta
-from typing import Any, TypeVar
+from typing import Any
-import click
-from rich.console import Console
-from .config import DEFAULT_TIMEOUT, BackendConfig, resolve_backend_config
+def build_headers(api_key: str | None) -> dict[str, str]:
+    if not api_key:
+        return {}
+    return {"Authorization": f"Bearer {api_key}", "X-API-Key": api_key}
-T = TypeVar("T")
-console = Console()
-def parse_relative_time(value: str | None) -> str | None:
-    """Convert relative time expressions (e.g., '5m', '2h', '1d') to ISO strings."""
-    if not value:
-        return None
-    token = value.strip().lower()
-    if not token:
-        return None
-    multiplier = 1.0
-    if token.endswith("ms"):
-        multiplier = 0.001
-        token = token[:-2]
-    elif token.endswith("s"):
-        multiplier = 1.0
-        token = token[:-1]
-    elif token.endswith("m"):
-        multiplier = 60.0
-        token = token[:-1]
-    elif token.endswith("h"):
-        multiplier = 3600.0
-        token = token[:-1]
-    elif token.endswith("d"):
-        multiplier = 86400.0
-        token = token[:-1]
-    try:
-        seconds = float(token) * multiplier
-    except ValueError:
-        return value
-    dt = datetime.now(UTC) - timedelta(seconds=seconds)
-    return dt.isoformat()
-def ensure_async(fn: Callable[..., Coroutine[Any, Any, T]]) -> Callable[..., T]:
-    """Decorator to run an async callable via asyncio.run inside Click commands."""
-    def wrapper(*args, **kwargs):
-        return asyncio.run(fn(*args, **kwargs))
-    return wrapper
-def resolve_context_config(
-    ctx: click.Context,
-    *,
-    base_url: str | None,
-    api_key: str | None,
-    timeout: float | None,
-) -> BackendConfig:
-    if base_url is not None or api_key is not None or timeout not in (None, DEFAULT_TIMEOUT):
-        return resolve_backend_config(base_url=base_url, api_key=api_key, timeout=timeout)
-    obj = ctx.find_object(dict)
-    if obj and isinstance(obj.get("status_backend_config"), BackendConfig):
-        return obj["status_backend_config"]
-    return resolve_backend_config(base_url=base_url, api_key=api_key, timeout=timeout)
-def warn(message: str) -> None:
-    console.print(f"[yellow]{message}[/yellow]")
-def bail(message: str) -> None:
-    raise click.ClickException(message)
-def common_options() -> Callable[[Callable[..., T]], Callable[..., T]]:
-    """Apply shared backend CLI options to a command."""
-    def decorator(func: Callable[..., T]) -> Callable[..., T]:
-        options = [
-            click.option(
-                "--base-url",
-                envvar="SYNTH_STATUS_BASE_URL",
-                default=None,
-                help="Override the Synth backend base URL for this command.",
-            ),
-            click.option(
-                "--api-key",
-                envvar="SYNTH_STATUS_API_KEY",
-                default=None,
-                help="API key for the Synth backend.",
-            ),
-            click.option(
-                "--timeout",
-                default=DEFAULT_TIMEOUT,
-                show_default=True,
-                type=float,
-                help="HTTP request timeout in seconds.",
-            ),
-        ]
-        for option in reversed(options):
-            func = option(func)
-        return func
-    return decorator
+def ensure_status_ok(response) -> dict[str, Any]:
+    if response.status_code >= 400:
+        detail = ""
+        try:
+            payload = response.json()
+            detail = payload.get("detail", "")
+        except Exception:
+            detail = response.text
+        raise RuntimeError(detail or f"Request failed ({response.status_code})")
+    return response.json()

synth_ai/cli/commands/train/__init__.py CHANGED Viewed

@@ -1,22 +1,21 @@
 from .core import register, train_command
 from .errors import (
-    InvalidJudgeConfigError,
     InvalidRubricConfigError,
+    InvalidVerifierConfigError,
     TrainCliError,
 )
-from .judge_schemas import (
-    JudgeConfig,
-    JudgeOptionsConfig,
-    JudgeRequestPayload,
+from .verifier_schemas import (
     RubricConfig,
     RubricWeightsConfig,
-    build_judge_http_options,
+    VerifierConfig,
+    VerifierOptionsConfig,
+    VerifierRequestPayload,
+    build_verifier_http_options,
 )
-from .judge_validation import (
-    check_for_deprecated_fields,
-    extract_and_validate_judge_rubric,
-    validate_judge_config,
+from .verifier_validation import (
+    extract_and_validate_verifier_rubric,
     validate_rubric_config,
+    validate_verifier_config,
 )
 from .validation import (
     load_and_validate_rl,
@@ -31,23 +30,22 @@ __all__ = [
     "train_command",
     # Errors
     "TrainCliError",
-    "InvalidJudgeConfigError",
+    "InvalidVerifierConfigError",
     "InvalidRubricConfigError",
     # SFT/RL validation
     "validate_sft_config",
     "validate_rl_config",
     "load_and_validate_sft",
     "load_and_validate_rl",
-    # Judge/Rubric schemas
+    # Verifier/Rubric schemas
     "RubricWeightsConfig",
     "RubricConfig",
-    "JudgeOptionsConfig",
-    "JudgeConfig",
-    "JudgeRequestPayload",
-    "build_judge_http_options",
-    # Judge/Rubric validation
+    "VerifierOptionsConfig",
+    "VerifierConfig",
+    "VerifierRequestPayload",
+    "build_verifier_http_options",
+    # Verifier/Rubric validation
     "validate_rubric_config",
-    "validate_judge_config",
-    "extract_and_validate_judge_rubric",
-    "check_for_deprecated_fields",
+    "validate_verifier_config",
+    "extract_and_validate_verifier_rubric",
 ]

synth_ai/cli/commands/train/errors.py CHANGED Viewed

@@ -82,8 +82,8 @@ class InvalidTopologyError(TrainCliError):
 @dataclass(slots=True)
-class InvalidJudgeConfigError(TrainCliError):
-    """Raised when judge configuration validation fails."""
+class InvalidVerifierConfigError(TrainCliError):
+    """Raised when verifier configuration validation fails."""
     detail: str
     def __str__(self) -> str:
@@ -112,6 +112,6 @@ __all__ = [
     "UnsupportedAlgorithmError",
     "InvalidHyperparametersError",
     "InvalidTopologyError",
-    "InvalidJudgeConfigError",
+    "InvalidVerifierConfigError",
     "InvalidRubricConfigError",
 ]

synth_ai/cli/commands/train/prompt_learning_validation.py CHANGED Viewed

@@ -29,7 +29,7 @@ KNOWN_PROMPT_LEARNING_FIELDS = {
     "policy",
     "mipro",
     "gepa",
-    "judge",
+    "verifier",
     "proxy_models",
     "env_config",
     "env_name",
@@ -87,7 +87,7 @@ KNOWN_GEPA_FIELDS = {
     "population",
     "archive",
     "token",
-    "judge",
+    "verifier",
     "proxy_models",
     "adaptive_pool",
     "adaptive_batch",
@@ -204,7 +204,7 @@ KNOWN_MIPRO_FIELDS = {
     "demo",
     "grounding",
     "meta_update",
-    "judge",
+    "verifier",
     "proxy_models",
     "adaptive_pool",
     "spec_path",
@@ -219,16 +219,15 @@ KNOWN_MIPRO_FIELDS = {
     "min_bootstrap_demos",
 }
-# Known fields in [prompt_learning.judge]
-KNOWN_JUDGE_FIELDS = {
+# Known fields in [prompt_learning.verifier]
+KNOWN_VERIFIER_FIELDS = {
     "enabled",
     "reward_source",
     "backend_base",
     "backend_api_key_env",
     "backend_provider",
     "backend_model",
-    "synth_verifier_id",
-    "backend_rubric_id",
+    "verifier_graph_id",
     "backend_event_enabled",
     "backend_outcome_enabled",
     "backend_options",
@@ -300,7 +299,7 @@ DEPRECATED_FIELDS = {
     "max_concurrent_rollouts": "Use [prompt_learning.gepa.rollout].max_concurrent instead.",
     "evaluation_seeds": "Use [prompt_learning.gepa.evaluation].seeds instead of flat evaluation_seeds.",
     "validation_seeds": "Use [prompt_learning.gepa.evaluation].validation_seeds instead.",
-    "backend_rubric_id": "Use 'synth_verifier_id' instead of 'backend_rubric_id' in [prompt_learning.judge].",
+    "backend_rubric_id": "Use 'verifier_graph_id' in [prompt_learning.verifier].",
 }
@@ -444,10 +443,10 @@ def validate_prompt_learning_config(
             "termination_config is supported and will create backend TerminationManager conditions"
         )
-    # Validate [prompt_learning.judge] if present
-    judge = pl_config.get("judge")
-    if judge and isinstance(judge, dict):
-        _check_unknown_fields(judge, KNOWN_JUDGE_FIELDS, "prompt_learning.judge", result)
+    # Validate [prompt_learning.verifier] if present
+    verifier = pl_config.get("verifier")
+    if verifier and isinstance(verifier, dict):
+        _check_unknown_fields(verifier, KNOWN_VERIFIER_FIELDS, "prompt_learning.verifier", result)
     # Validate [prompt_learning.proxy_models] if present
     proxy_models = pl_config.get("proxy_models")
@@ -553,9 +552,9 @@ def _validate_gepa_config(
             result,
         )
-    if "judge" in gepa and isinstance(gepa["judge"], dict):
+    if "verifier" in gepa and isinstance(gepa["verifier"], dict):
         _check_unknown_fields(
-            gepa["judge"], KNOWN_JUDGE_FIELDS, "prompt_learning.gepa.judge", result
+            gepa["verifier"], KNOWN_VERIFIER_FIELDS, "prompt_learning.gepa.verifier", result
         )
@@ -575,9 +574,9 @@ def _validate_mipro_config(
     _check_unknown_fields(mipro, KNOWN_MIPRO_FIELDS, "prompt_learning.mipro", result)
     # Validate nested sections
-    if "judge" in mipro and isinstance(mipro["judge"], dict):
+    if "verifier" in mipro and isinstance(mipro["verifier"], dict):
         _check_unknown_fields(
-            mipro["judge"], KNOWN_JUDGE_FIELDS, "prompt_learning.mipro.judge", result
+            mipro["verifier"], KNOWN_VERIFIER_FIELDS, "prompt_learning.mipro.verifier", result
         )
     if "adaptive_pool" in mipro and isinstance(mipro["adaptive_pool"], dict):

synth_ai/cli/commands/train/validation.py CHANGED Viewed

@@ -12,10 +12,10 @@ from synth_ai.sdk.api.train.configs.sft import SFTConfig
 from synth_ai.sdk.api.train.utils import load_toml
 from .errors import (
-    InvalidJudgeConfigError,
     InvalidRLConfigError,
     InvalidRubricConfigError,
     InvalidSFTConfigError,
+    InvalidVerifierConfigError,
     MissingAlgorithmError,
     MissingComputeError,
     MissingDatasetError,
@@ -23,7 +23,7 @@ from .errors import (
     TomlParseError,
     UnsupportedAlgorithmError,
 )
-from .judge_validation import extract_and_validate_judge_rubric
+from .verifier_validation import extract_and_validate_verifier_rubric
 __all__ = [
     "validate_sft_config",
@@ -317,16 +317,16 @@ def validate_rl_config(config: MutableMapping[str, Any]) -> dict[str, Any]:
     if "reference_placement" not in config["compute"]["topology"]:
         config["compute"]["topology"]["reference_placement"] = "none"
-    # Validate judge/rubric configuration with formalized Pydantic models
+    # Validate verifier/rubric configuration with formalized Pydantic models
     # This will emit deprecation warnings for dead fields and validate structure
     try:
-        rubric_config, judge_config = extract_and_validate_judge_rubric(config)
+        rubric_config, verifier_config = extract_and_validate_verifier_rubric(config)
         # Validation passed - configs are clean and ready for use
         # The validated Pydantic models can be used by training code if needed
-    except (InvalidJudgeConfigError, InvalidRubricConfigError) as exc:
+    except (InvalidVerifierConfigError, InvalidRubricConfigError) as exc:
         raise InvalidRLConfigError(
-            detail=f"Judge/Rubric validation failed: {exc.detail}",
-            hint="Check JUDGE_RUBRIC_CLEANUP_GUIDE.md for migration help."
+            detail=f"Verifier/Rubric validation failed: {exc.detail}",
+            hint="Check the verifier/rubric cleanup guide for migration help."
         ) from exc
     # Validate using Pydantic model

synth_ai/cli/commands/train/{judge_schemas.py → verifier_schemas.py} RENAMED Viewed

@@ -1,7 +1,7 @@
 """
-Pydantic schemas for judge/rubric configuration.
+Pydantic schemas for verifier/rubric configuration.
-These models define the ACTUAL fields used by the backend judge service,
+These models define the ACTUAL fields used by the backend verifier service,
 with all dead code removed. This is the single source of truth for what
 gets sent in HTTP requests.
 """
@@ -17,9 +17,9 @@ from synth_ai.sdk.api.train.configs.shared import ExtraModel
 __all__ = [
     "RubricWeightsConfig",
     "RubricConfig",
-    "JudgeOptionsConfig",
-    "JudgeConfig",
-    "JudgeRequestPayload",
+    "VerifierOptionsConfig",
+    "VerifierConfig",
+    "VerifierRequestPayload",
 ]
@@ -27,8 +27,8 @@ class RubricWeightsConfig(ExtraModel):
     """
     Reward blending weights (client-side only, not sent to backend).
-    These weights control how env rewards, event judge scores, and outcome
-    judge scores are combined into a final reward signal for policy gradients.
+    These weights control how env rewards, event verifier scores, and outcome
+    verifier scores are combined into a final reward signal for policy gradients.
     Formula:
         total_reward = (env * env_return) + (event * sum(event_scores)) + (outcome * outcome_score)
@@ -40,12 +40,12 @@ class RubricWeightsConfig(ExtraModel):
     )
     event: float = Field(
         default=0.0,
-        description="Weight for per-event judge scores (step-level judging)",
+        description="Weight for per-event verifier scores (step-level verification)",
         ge=0.0,
     )
     outcome: float = Field(
         default=0.0,
-        description="Weight for outcome judge score (episode-level judging)",
+        description="Weight for outcome verifier score (episode-level verification)",
         ge=0.0,
     )
@@ -61,11 +61,11 @@ class RubricConfig(ExtraModel):
     """
     Top-level rubric configuration.
-    Controls whether rubric-based judging is enabled and how rewards are blended.
+    Controls whether rubric-based verification is enabled and how rewards are blended.
     """
     enabled: bool = Field(
         default=False,
-        description="Master switch for rubric-based judging",
+        description="Master switch for rubric-based verification",
     )
     weights: RubricWeightsConfig = Field(
         default_factory=RubricWeightsConfig,
@@ -73,16 +73,16 @@ class RubricConfig(ExtraModel):
     )
-class JudgeOptionsConfig(ExtraModel):
+class VerifierOptionsConfig(ExtraModel):
     """
-    Judge provider options (sent to backend in HTTP request).
+    Verifier provider options (sent to backend in HTTP request).
-    These fields are sent in the "options" object of the judge score request.
-    All fields here map directly to the backend JudgeOptions schema.
+    These fields are sent in the "options" object of the verifier request.
+    All fields here map directly to the backend verifier options schema.
     """
     provider: str = Field(
         ...,
-        description="Judge provider type ('openai', 'groq', 'gemini')",
+        description="Verifier provider type ('openai', 'groq', 'gemini')",
         pattern=r"^(openai|groq|gemini)$",
     )
     model: str = Field(
@@ -96,11 +96,11 @@ class JudgeOptionsConfig(ExtraModel):
     )
     event: bool = Field(
         default=True,
-        description="Enable per-event (step-level) judging",
+        description="Enable per-event (step-level) verification",
     )
     outcome: bool = Field(
         default=True,
-        description="Enable outcome (episode-level) judging",
+        description="Enable outcome (episode-level) verification",
     )
     timeout_s: Optional[float] = Field(
         default=None,
@@ -120,38 +120,38 @@ class JudgeOptionsConfig(ExtraModel):
     )
     @model_validator(mode="after")
-    def _validate_at_least_one_enabled(self) -> JudgeOptionsConfig:
-        """Ensure at least one judging type is enabled."""
+    def _validate_at_least_one_enabled(self) -> VerifierOptionsConfig:
+        """Ensure at least one verification type is enabled."""
         if not self.event and not self.outcome:
             raise ValueError("At least one of 'event' or 'outcome' must be enabled")
         return self
-class JudgeConfig(ExtraModel):
+class VerifierConfig(ExtraModel):
     """
-    Top-level judge configuration.
+    Top-level verifier configuration.
-    This is parsed from TOML [judge] section and contains all judge-related settings.
+    This is parsed from TOML [verifier] section and contains all verifier-related settings.
     """
-    options: JudgeOptionsConfig = Field(
+    options: VerifierOptionsConfig = Field(
         ...,
-        description="Judge provider options (sent to backend)",
+        description="Verifier provider options (sent to backend)",
     )
 # HTTP Request Payload Structures (for documentation/type safety)
-class JudgeRequestPayload(ExtraModel):
+class VerifierRequestPayload(ExtraModel):
     """
-    HTTP request payload structure for POST /api/judge/v1/score.
+    HTTP request payload structure for POST /api/graphs/verifiers/completions.
-    This is the ACTUAL payload sent to the backend judge service.
+    This is the ACTUAL payload sent to the backend verifier service.
     Used for type safety and documentation only.
     """
     policy_name: str = Field(..., description="Name of the policy being evaluated")
     task_app: dict[str, Any] = Field(..., description="Task app metadata (id, base_url)")
     trace: dict[str, Any] = Field(..., description="Tracing v3 payload (event_history, metadata)")
-    options: dict[str, Any] = Field(..., description="Judge options (provider, model, etc.)")
+    options: dict[str, Any] = Field(..., description="Verifier options (provider, model, etc.)")
     class Config:
         extra = "allow"  # Backend might add extra fields
@@ -159,16 +159,16 @@ class JudgeRequestPayload(ExtraModel):
 # Helper to convert to backend request format
-def build_judge_http_options(
-    options_config: JudgeOptionsConfig,
+def build_verifier_http_options(
+    options_config: VerifierOptionsConfig,
     *,
     rubric_overrides_from_task_info: Optional[dict[str, Any]] = None,
 ) -> dict[str, Any]:
     """
-    Build the 'options' dict for HTTP request to backend judge.
+    Build the 'options' dict for HTTP request to backend verifier.
     Args:
-        options_config: Validated judge options from TOML
+        options_config: Validated verifier options from TOML
         rubric_overrides_from_task_info: Dynamic overrides fetched from TaskInfo (takes priority)
     Returns:
@@ -198,4 +198,3 @@ def build_judge_http_options(
         payload["rubric_overrides"] = options_config.rubric_overrides
     return payload

synth-ai 0.4.1__py3-none-any.whl → 0.4.4__py3-none-any.whl

Potentially problematic release.

synth-ai 0.4.1py3-none-any.whl → 0.4.4py3-none-any.whl