PyPI - synth-ai - Versions diffs - 0.4.1__py3-none-any.whl → 0.4.4__py3-none-any.whl - Mend

synth-ai 0.4.1py3-none-any.whl → 0.4.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of synth-ai might be problematic. Click here for more details.

Files changed (153) hide show

synth_ai/__init__.py +13 -13
synth_ai/cli/__init__.py +6 -15
synth_ai/cli/commands/eval/__init__.py +6 -15
synth_ai/cli/commands/eval/config.py +338 -0
synth_ai/cli/commands/eval/core.py +236 -1091
synth_ai/cli/commands/eval/runner.py +704 -0
synth_ai/cli/commands/eval/validation.py +44 -117
synth_ai/cli/commands/filter/core.py +7 -7
synth_ai/cli/commands/filter/validation.py +2 -2
synth_ai/cli/commands/smoke/core.py +7 -17
synth_ai/cli/commands/status/__init__.py +1 -64
synth_ai/cli/commands/status/client.py +50 -151
synth_ai/cli/commands/status/config.py +3 -83
synth_ai/cli/commands/status/errors.py +4 -13
synth_ai/cli/commands/status/subcommands/__init__.py +2 -8
synth_ai/cli/commands/status/subcommands/config.py +13 -0
synth_ai/cli/commands/status/subcommands/files.py +18 -63
synth_ai/cli/commands/status/subcommands/jobs.py +28 -311
synth_ai/cli/commands/status/subcommands/models.py +18 -62
synth_ai/cli/commands/status/subcommands/runs.py +16 -63
synth_ai/cli/commands/status/subcommands/session.py +67 -172
synth_ai/cli/commands/status/subcommands/summary.py +24 -32
synth_ai/cli/commands/status/subcommands/utils.py +41 -0
synth_ai/cli/commands/status/utils.py +16 -107
synth_ai/cli/commands/train/__init__.py +18 -20
synth_ai/cli/commands/train/errors.py +3 -3
synth_ai/cli/commands/train/prompt_learning_validation.py +15 -16
synth_ai/cli/commands/train/validation.py +7 -7
synth_ai/cli/commands/train/{judge_schemas.py → verifier_schemas.py} +33 -34
synth_ai/cli/commands/train/verifier_validation.py +235 -0
synth_ai/cli/demo_apps/demo_task_apps/math/config.toml +0 -1
synth_ai/cli/demo_apps/demo_task_apps/math/modal_task_app.py +2 -6
synth_ai/cli/demo_apps/math/config.toml +0 -1
synth_ai/cli/demo_apps/math/modal_task_app.py +2 -6
synth_ai/cli/demo_apps/mipro/task_app.py +25 -47
synth_ai/cli/lib/apps/task_app.py +12 -13
synth_ai/cli/lib/task_app_discovery.py +6 -6
synth_ai/cli/lib/train_cfgs.py +10 -10
synth_ai/cli/task_apps/__init__.py +11 -0
synth_ai/cli/task_apps/commands.py +7 -15
synth_ai/core/env.py +12 -1
synth_ai/core/errors.py +1 -2
synth_ai/core/integrations/cloudflare.py +209 -33
synth_ai/core/tracing_v3/abstractions.py +46 -0
synth_ai/data/__init__.py +3 -30
synth_ai/data/enums.py +1 -20
synth_ai/data/rewards.py +100 -3
synth_ai/products/graph_evolve/__init__.py +1 -2
synth_ai/products/graph_evolve/config.py +16 -16
synth_ai/products/graph_evolve/converters/__init__.py +3 -3
synth_ai/products/graph_evolve/converters/openai_sft.py +7 -7
synth_ai/products/graph_evolve/examples/hotpotqa/config.toml +1 -1
synth_ai/products/graph_gepa/__init__.py +23 -0
synth_ai/products/graph_gepa/converters/__init__.py +19 -0
synth_ai/products/graph_gepa/converters/openai_sft.py +29 -0
synth_ai/sdk/__init__.py +45 -35
synth_ai/sdk/api/eval/__init__.py +33 -0
synth_ai/sdk/api/eval/job.py +732 -0
synth_ai/sdk/api/research_agent/__init__.py +276 -66
synth_ai/sdk/api/train/builders.py +181 -0
synth_ai/sdk/api/train/cli.py +41 -33
synth_ai/sdk/api/train/configs/__init__.py +6 -4
synth_ai/sdk/api/train/configs/prompt_learning.py +127 -33
synth_ai/sdk/api/train/configs/rl.py +264 -16
synth_ai/sdk/api/train/configs/sft.py +165 -1
synth_ai/sdk/api/train/graph_validators.py +12 -12
synth_ai/sdk/api/train/graphgen.py +169 -51
synth_ai/sdk/api/train/graphgen_models.py +95 -45
synth_ai/sdk/api/train/local_api.py +10 -0
synth_ai/sdk/api/train/pollers.py +36 -0
synth_ai/sdk/api/train/prompt_learning.py +390 -60
synth_ai/sdk/api/train/rl.py +41 -5
synth_ai/sdk/api/train/sft.py +2 -0
synth_ai/sdk/api/train/task_app.py +20 -0
synth_ai/sdk/api/train/validators.py +17 -17
synth_ai/sdk/graphs/completions.py +239 -33
synth_ai/sdk/{judging/schemas.py → graphs/verifier_schemas.py} +23 -23
synth_ai/sdk/learning/__init__.py +35 -5
synth_ai/sdk/learning/context_learning_client.py +531 -0
synth_ai/sdk/learning/context_learning_types.py +294 -0
synth_ai/sdk/learning/prompt_learning_client.py +1 -1
synth_ai/sdk/learning/prompt_learning_types.py +2 -1
synth_ai/sdk/learning/rl/__init__.py +0 -4
synth_ai/sdk/learning/rl/contracts.py +0 -4
synth_ai/sdk/localapi/__init__.py +40 -0
synth_ai/sdk/localapi/apps/__init__.py +28 -0
synth_ai/sdk/localapi/client.py +10 -0
synth_ai/sdk/localapi/contracts.py +10 -0
synth_ai/sdk/localapi/helpers.py +519 -0
synth_ai/sdk/localapi/rollouts.py +93 -0
synth_ai/sdk/localapi/server.py +29 -0
synth_ai/sdk/localapi/template.py +49 -0
synth_ai/sdk/streaming/handlers.py +6 -6
synth_ai/sdk/streaming/streamer.py +10 -6
synth_ai/sdk/task/__init__.py +18 -5
synth_ai/sdk/task/apps/__init__.py +37 -1
synth_ai/sdk/task/client.py +9 -1
synth_ai/sdk/task/config.py +6 -11
synth_ai/sdk/task/contracts.py +137 -95
synth_ai/sdk/task/in_process.py +32 -22
synth_ai/sdk/task/in_process_runner.py +9 -4
synth_ai/sdk/task/rubrics/__init__.py +2 -3
synth_ai/sdk/task/rubrics/loaders.py +4 -4
synth_ai/sdk/task/rubrics/strict.py +3 -4
synth_ai/sdk/task/server.py +76 -16
synth_ai/sdk/task/trace_correlation_helpers.py +190 -139
synth_ai/sdk/task/validators.py +34 -49
synth_ai/sdk/training/__init__.py +7 -16
synth_ai/sdk/tunnels/__init__.py +118 -0
synth_ai/sdk/tunnels/cleanup.py +83 -0
synth_ai/sdk/tunnels/ports.py +120 -0
synth_ai/sdk/tunnels/tunneled_api.py +363 -0
{synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/METADATA +71 -4
{synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/RECORD +118 -128
synth_ai/cli/commands/baseline/__init__.py +0 -12
synth_ai/cli/commands/baseline/core.py +0 -636
synth_ai/cli/commands/baseline/list.py +0 -94
synth_ai/cli/commands/eval/errors.py +0 -81
synth_ai/cli/commands/status/formatters.py +0 -164
synth_ai/cli/commands/status/subcommands/pricing.py +0 -23
synth_ai/cli/commands/status/subcommands/usage.py +0 -203
synth_ai/cli/commands/train/judge_validation.py +0 -305
synth_ai/cli/usage.py +0 -159
synth_ai/data/specs.py +0 -36
synth_ai/sdk/api/research_agent/cli.py +0 -428
synth_ai/sdk/api/research_agent/config.py +0 -357
synth_ai/sdk/api/research_agent/job.py +0 -717
synth_ai/sdk/baseline/__init__.py +0 -25
synth_ai/sdk/baseline/config.py +0 -209
synth_ai/sdk/baseline/discovery.py +0 -216
synth_ai/sdk/baseline/execution.py +0 -154
synth_ai/sdk/judging/__init__.py +0 -15
synth_ai/sdk/judging/base.py +0 -24
synth_ai/sdk/judging/client.py +0 -191
synth_ai/sdk/judging/types.py +0 -42
synth_ai/sdk/research_agent/__init__.py +0 -34
synth_ai/sdk/research_agent/container_builder.py +0 -328
synth_ai/sdk/research_agent/container_spec.py +0 -198
synth_ai/sdk/research_agent/defaults.py +0 -34
synth_ai/sdk/research_agent/results_collector.py +0 -69
synth_ai/sdk/specs/__init__.py +0 -46
synth_ai/sdk/specs/dataclasses.py +0 -149
synth_ai/sdk/specs/loader.py +0 -144
synth_ai/sdk/specs/serializer.py +0 -199
synth_ai/sdk/specs/validation.py +0 -250
synth_ai/sdk/tracing/__init__.py +0 -39
synth_ai/sdk/usage/__init__.py +0 -37
synth_ai/sdk/usage/client.py +0 -171
synth_ai/sdk/usage/models.py +0 -261
{synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/WHEEL +0 -0
{synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/entry_points.txt +0 -0
{synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/licenses/LICENSE +0 -0
{synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/top_level.txt +0 -0

synth_ai/sdk/api/train/validators.py CHANGED Viewed

@@ -547,37 +547,37 @@ def validate_prompt_learning_config(config_data: dict[str, Any], config_path: Pa
                 )
                 errors.extend(lo_errors)
-    # Validate judge config (shared by GEPA and MIPRO)
-    judge_section = pl_section.get("judge") or {}
-    if judge_section:
-        if not isinstance(judge_section, dict):
-            errors.append(f"prompt_learning.judge must be a table/dict, got {type(judge_section).__name__}")
+    # Validate verifier config (shared by GEPA and MIPRO)
+    verifier_section = pl_section.get("verifier") or {}
+    if verifier_section:
+        if not isinstance(verifier_section, dict):
+            errors.append(f"prompt_learning.verifier must be a table/dict, got {type(verifier_section).__name__}")
         else:
-            reward_source = str(judge_section.get("reward_source", "task_app")).strip().lower()
-            enabled = bool(judge_section.get("enabled"))
-            if reward_source and reward_source not in {"task_app", "judge", "fused"}:
-                errors.append("prompt_learning.judge.reward_source must be 'task_app', 'judge', or 'fused'")
-            backend_base = str(judge_section.get("backend_base", "") or "").strip()
-            backend_provider = str(judge_section.get("backend_provider", "") or "").strip()
-            backend_model = str(judge_section.get("backend_model", "") or "").strip()
+            reward_source = str(verifier_section.get("reward_source", "task_app")).strip().lower()
+            enabled = bool(verifier_section.get("enabled"))
+            if reward_source and reward_source not in {"task_app", "verifier", "fused"}:
+                errors.append("prompt_learning.verifier.reward_source must be 'task_app', 'verifier', or 'fused'")
+            backend_base = str(verifier_section.get("backend_base", "") or "").strip()
+            backend_provider = str(verifier_section.get("backend_provider", "") or "").strip()
+            backend_model = str(verifier_section.get("backend_model", "") or "").strip()
             if enabled:
                 pass
             if reward_source == "fused":
-                weight_event = judge_section.get("weight_event", 0.0)
-                weight_outcome = judge_section.get("weight_outcome", 0.0)
+                weight_event = verifier_section.get("weight_event", 0.0)
+                weight_outcome = verifier_section.get("weight_outcome", 0.0)
                 try:
                     weight_event_f = float(weight_event)
                 except (TypeError, ValueError):
-                    errors.append("prompt_learning.judge.weight_event must be numeric")
+                    errors.append("prompt_learning.verifier.weight_event must be numeric")
                     weight_event_f = 0.0
                 try:
                     weight_outcome_f = float(weight_outcome)
                 except (TypeError, ValueError):
-                    errors.append("prompt_learning.judge.weight_outcome must be numeric")
+                    errors.append("prompt_learning.verifier.weight_outcome must be numeric")
                     weight_outcome_f = 0.0
                 if weight_event_f <= 0 and weight_outcome_f <= 0:
                     errors.append(
-                        "prompt_learning.judge.reward_source='fused' requires weight_event > 0 or weight_outcome > 0"
+                        "prompt_learning.verifier.reward_source='fused' requires weight_event > 0 or weight_outcome > 0"
                     )
     # Check for multi-stage/multi-module pipeline config

synth_ai/sdk/graphs/completions.py CHANGED Viewed

@@ -1,13 +1,27 @@
-"""Graph completions client for graph inference (policies, verifiers, RLM)."""
+"""Graph completions client for graph inference (policies, verifiers, RLM).
+**Status:** Alpha
+This module provides the client for running inference on trained graphs,
+including policy graphs, verifier graphs, and Reasoning Language Models (RLM).
+Provides both sync and async clients:
+- GraphCompletionsSyncClient: Synchronous client using httpx
+- GraphCompletionsAsyncClient: Asynchronous client using AsyncHttpClient
+- GraphCompletionsClient: Alias for GraphCompletionsAsyncClient (backward compat)
+"""
 from __future__ import annotations
 import json
+from dataclasses import dataclass
 from typing import Any, Literal, List, Mapping, Optional, TypedDict, Union
+import httpx
 from synth_ai.core.http import AsyncHttpClient, HTTPError
 from synth_ai.core.tracing_v3.serialization import normalize_for_json
-from synth_ai.sdk.judging.schemas import (
+from synth_ai.sdk.graphs.verifier_schemas import (
     CalibrationExampleInput,
     GoldExampleInput,
 )
@@ -20,7 +34,7 @@ class GraphTarget(TypedDict, total=False):
     job_id: str
     graph_name: str
     graphgen_job_id: str
-    verifier_type: str
+    verifier_shape: str
 class GraphInfo(TypedDict, total=False):
@@ -29,7 +43,7 @@ class GraphInfo(TypedDict, total=False):
     graph_id: str
     name: str
     version: int
-    kind: str  # "policy", "verifier", "judge"
+    kind: str  # "policy", "verifier"
     best_score: float | None
     job_id: str | None  # Source job that created this graph
     created_at: str
@@ -42,8 +56,194 @@ class ListGraphsResponse(TypedDict):
     total: int
-class GraphCompletionsClient:
-    """Client for /api/graphs/completions with flexible graph targeting."""
+@dataclass
+class GraphCompletionResponse:
+    """Response from graph completion endpoint."""
+    output: dict[str, Any]
+    """The graph output data."""
+    usage: dict[str, Any] | None = None
+    """Token usage statistics."""
+    cache_status: str | None = None
+    """Cache hit status: 'warm', 'cold', or None."""
+    latency_ms: float | None = None
+    """Request latency in milliseconds."""
+    raw: dict[str, Any] | None = None
+    """Raw response dict for accessing additional fields."""
+    @classmethod
+    def from_dict(cls, data: dict[str, Any]) -> "GraphCompletionResponse":
+        """Create from API response dict."""
+        return cls(
+            output=data.get("output", {}),
+            usage=data.get("usage"),
+            cache_status=data.get("cache_status"),
+            latency_ms=data.get("latency_ms"),
+            raw=data,
+        )
+class GraphCompletionsSyncClient:
+    """Synchronous client for graph completions using httpx.
+    Example:
+        ```python
+        client = GraphCompletionsSyncClient(base_url, api_key)
+        # Run inference on a GraphGen job
+        response = client.run(job_id="graphgen_xxx", input_data={"query": "hello"})
+        print(response.output)
+        # Just get the output
+        output = client.run_output(job_id="graphgen_xxx", input_data={"query": "hello"})
+        ```
+    """
+    def __init__(self, base_url: str, api_key: str, *, timeout: float = 60.0) -> None:
+        self._base = base_url.rstrip("/")
+        self._key = api_key
+        self._timeout = timeout
+    def _resolve_job_id(self, *, job_id: str | None, graph: GraphTarget | None) -> str:
+        if job_id:
+            return job_id
+        if not graph:
+            raise ValueError("graph_completions_missing_job_id")
+        if graph.get("job_id"):
+            return str(graph["job_id"])
+        kind = graph.get("kind")
+        if kind == "zero_shot":
+            verifier_shape = graph.get("verifier_shape") or graph.get("graph_name")
+            if not verifier_shape:
+                raise ValueError("graph_completions_missing_verifier_shape")
+            return str(verifier_shape)
+        if kind == "graphgen":
+            graphgen_job_id = graph.get("graphgen_job_id")
+            if not graphgen_job_id:
+                raise ValueError("graph_completions_missing_graphgen_job_id")
+            return str(graphgen_job_id)
+        graph_name = graph.get("graph_name")
+        if graph_name:
+            return str(graph_name)
+        raise ValueError("graph_completions_missing_graph_target")
+    def run(
+        self,
+        *,
+        input_data: Mapping[str, Any],
+        job_id: str | None = None,
+        graph: GraphTarget | None = None,
+        model: str | None = None,
+        prompt_snapshot_id: str | None = None,
+        timeout: float | None = None,
+    ) -> GraphCompletionResponse:
+        """Run graph completion and return typed response.
+        Args:
+            input_data: Input data for the graph
+            job_id: GraphGen job ID or graph name
+            graph: Alternative graph target specification
+            model: Optional model override
+            prompt_snapshot_id: Specific snapshot to use
+            timeout: Request timeout (overrides client default)
+        Returns:
+            GraphCompletionResponse with output, usage, cache_status, etc.
+        """
+        payload: dict[str, Any] = {
+            "job_id": self._resolve_job_id(job_id=job_id, graph=graph),
+            "input": normalize_for_json(dict(input_data)),
+        }
+        if model:
+            payload["model"] = model
+        if prompt_snapshot_id:
+            payload["prompt_snapshot_id"] = prompt_snapshot_id
+        url = f"{self._base}/api/graphs/completions"
+        headers = {"X-API-Key": self._key, "Content-Type": "application/json"}
+        with httpx.Client(timeout=timeout or self._timeout) as client:
+            resp = client.post(url, headers=headers, json=payload)
+            if resp.status_code == 400 or resp.status_code == 422:
+                raise ValueError(f"graph_completions_validation_error: {resp.text[:500]}")
+            if resp.status_code in (401, 403):
+                raise PermissionError(f"graph_completions_auth_error: {resp.text[:500]}")
+            if resp.status_code == 404:
+                raise FileNotFoundError(f"graph_completions_not_found: {resp.text[:500]}")
+            if resp.status_code == 429:
+                raise Exception("graph_completions_rate_limited")
+            resp.raise_for_status()
+            return GraphCompletionResponse.from_dict(resp.json())
+    def run_output(
+        self,
+        *,
+        input_data: Mapping[str, Any],
+        job_id: str | None = None,
+        graph: GraphTarget | None = None,
+        model: str | None = None,
+        prompt_snapshot_id: str | None = None,
+        timeout: float | None = None,
+    ) -> dict[str, Any]:
+        """Run graph completion and return just the output dict.
+        Convenience method that returns only the output field.
+        """
+        result = self.run(
+            input_data=input_data,
+            job_id=job_id,
+            graph=graph,
+            model=model,
+            prompt_snapshot_id=prompt_snapshot_id,
+            timeout=timeout,
+        )
+        return result.output
+    def complete(
+        self,
+        graph_id: str,
+        input_data: Mapping[str, Any],
+        *,
+        model: str | None = None,
+        timeout: float | None = None,
+    ) -> GraphCompletionResponse:
+        """Execute any graph with arbitrary input.
+        Args:
+            graph_id: Built-in graph name, GraphGen job_id, or snapshot UUID
+            input_data: Graph-specific input data
+            model: Optional model override
+            timeout: Request timeout
+        Returns:
+            GraphCompletionResponse
+        """
+        return self.run(
+            input_data=input_data,
+            job_id=graph_id,
+            model=model,
+            timeout=timeout,
+        )
+class GraphCompletionsAsyncClient:
+    """Asynchronous client for graph completions.
+    Example:
+        ```python
+        client = GraphCompletionsAsyncClient(base_url, api_key)
+        # Run inference on a GraphGen job
+        result = await client.run(job_id="graphgen_xxx", input_data={"query": "hello"})
+        print(result["output"])
+        ```
+    """
     def __init__(self, base_url: str, api_key: str, *, timeout: float = 60.0) -> None:
         self._base = base_url.rstrip("/")
@@ -63,7 +263,7 @@ class GraphCompletionsClient:
         (determined by API key).
         Args:
-            kind: Optional filter by graph kind ("policy", "verifier", "judge")
+            kind: Optional filter by graph kind ("policy", "verifier")
             limit: Maximum number of graphs to return (default: 50)
         Returns:
@@ -112,10 +312,10 @@ class GraphCompletionsClient:
             return str(graph["job_id"])
         kind = graph.get("kind")
         if kind == "zero_shot":
-            verifier_type = graph.get("verifier_type") or graph.get("graph_name")
-            if not verifier_type:
-                raise ValueError("graph_completions_missing_verifier_type")
-            return str(verifier_type)
+            verifier_shape = graph.get("verifier_shape") or graph.get("graph_name")
+            if not verifier_shape:
+                raise ValueError("graph_completions_missing_verifier_shape")
+            return str(verifier_shape)
         if kind == "graphgen":
             graphgen_job_id = graph.get("graphgen_job_id")
             if not graphgen_job_id:
@@ -228,7 +428,7 @@ class GraphCompletionsClient:
         rubric: Mapping[str, Any],
         system_prompt: str | None = None,
         user_prompt: str | None = None,
-        verifier_type: str | None = None,
+        verifier_shape: str | None = None,
         options: Mapping[str, Any] | None = None,
         model: str | None = None,
     ) -> dict[str, Any]:
@@ -239,7 +439,7 @@ class GraphCompletionsClient:
             rubric: Rubric with event/outcome criteria
             system_prompt: Optional custom system prompt
             user_prompt: Optional custom user prompt
-            verifier_type: "single", "mapreduce", or "rlm" (auto-detects if None)
+            verifier_shape: "single", "mapreduce", or "rlm" (auto-detects if None)
             options: Optional execution options (event, outcome, etc.)
             model: Optional model override
@@ -247,11 +447,11 @@ class GraphCompletionsClient:
             Verification result with event_reviews, outcome_review, etc.
         """
         # Auto-select graph shape based on trace size
-        if verifier_type is None:
-            verifier_type = self._select_graph_shape(session_trace)
+        if verifier_shape is None:
+            verifier_shape = self._select_graph_shape(session_trace)
         # Use composable naming: zero_shot_verifier_{gold_output_format}_{graph_shape}
-        graph_id = f"zero_shot_verifier_rubric_{verifier_type}"
+        graph_id = f"zero_shot_verifier_rubric_{verifier_shape}"
         input_data: dict[str, Any] = {
             "session_trace": normalize_for_json(session_trace),
@@ -279,7 +479,7 @@ class GraphCompletionsClient:
         expected_rubric: str | None = None,
         system_prompt: str | None = None,
         user_prompt: str | None = None,
-        verifier_type: str | None = None,
+        verifier_shape: str | None = None,
         options: Mapping[str, Any] | None = None,
         model: str | None = None,
     ) -> dict[str, Any]:
@@ -295,7 +495,7 @@ class GraphCompletionsClient:
             expected_rubric: Optional rubric/ground truth for the trace being evaluated
             system_prompt: Optional custom system prompt
             user_prompt: Optional custom user prompt
-            verifier_type: "single", "mapreduce", or "rlm" (auto-detects if None)
+            verifier_shape: "single", "mapreduce", or "rlm" (auto-detects if None)
             options: Optional execution options
             model: Optional model override
@@ -317,10 +517,10 @@ class GraphCompletionsClient:
                     f"and outcome_reward (float 0.0-1.0). event_rewards length must match trace events."
                 ) from e
-        if verifier_type is None:
-            verifier_type = self._select_graph_shape(session_trace)
+        if verifier_shape is None:
+            verifier_shape = self._select_graph_shape(session_trace)
-        graph_id = f"zero_shot_verifier_fewshot_{verifier_type}"
+        graph_id = f"zero_shot_verifier_fewshot_{verifier_shape}"
         # Convert validated examples back to dict for serialization
         input_data: dict[str, Any] = {
@@ -354,7 +554,7 @@ class GraphCompletionsClient:
         expected_rubric: str | None = None,
         system_prompt: str | None = None,
         user_prompt: str | None = None,
-        verifier_type: str | None = None,
+        verifier_shape: str | None = None,
         options: Mapping[str, Any] | None = None,
         model: str | None = None,
     ) -> dict[str, Any]:
@@ -374,7 +574,7 @@ class GraphCompletionsClient:
             expected_rubric: Optional rubric/ground truth for this trace
             system_prompt: Optional custom system prompt
             user_prompt: Optional custom user prompt
-            verifier_type: "single", "mapreduce", or "rlm" (auto-detects if None)
+            verifier_shape: "single", "mapreduce", or "rlm" (auto-detects if None)
             options: Optional execution options
             model: Optional model override
@@ -408,10 +608,10 @@ class GraphCompletionsClient:
                 f"candidate_reasoning must be a non-empty string, got {type(candidate_reasoning).__name__}"
             )
-        if verifier_type is None:
-            verifier_type = self._select_graph_shape(session_trace)
+        if verifier_shape is None:
+            verifier_shape = self._select_graph_shape(session_trace)
-        graph_id = f"zero_shot_verifier_contrastive_{verifier_type}"
+        graph_id = f"zero_shot_verifier_contrastive_{verifier_shape}"
         # Convert validated examples back to dict for serialization
         input_data: dict[str, Any] = {
@@ -441,7 +641,7 @@ class GraphCompletionsClient:
         session_trace: Mapping[str, Any],
         system_prompt: str,
         user_prompt: str,
-        verifier_type: str | None = None,
+        verifier_shape: str | None = None,
         options: Mapping[str, Any] | None = None,
         model: str | None = None,
     ) -> dict[str, Any]:
@@ -451,19 +651,19 @@ class GraphCompletionsClient:
             session_trace: V3 trace format
             system_prompt: Custom system prompt (required)
             user_prompt: Custom user prompt (required)
-            verifier_type: "single", "mapreduce", or "rlm" (auto-detects if None)
+            verifier_shape: "single", "mapreduce", or "rlm" (auto-detects if None)
             options: Optional execution options
             model: Optional model override
         Returns:
             Verification result
         """
-        if verifier_type is None:
-            verifier_type = self._select_graph_shape(session_trace)
+        if verifier_shape is None:
+            verifier_shape = self._select_graph_shape(session_trace)
         # For custom prompts, use rubric single graph but with custom prompts
         # The graph will use the prompts instead of rubric
-        graph_id = f"zero_shot_verifier_rubric_{verifier_type}"
+        graph_id = f"zero_shot_verifier_rubric_{verifier_shape}"
         input_data: dict[str, Any] = {
             "session_trace": normalize_for_json(session_trace),
@@ -529,7 +729,7 @@ class GraphCompletionsClient:
         return result
-class VerifierClient(GraphCompletionsClient):
+class VerifierAsyncClient(GraphCompletionsAsyncClient):
     """Verifier graph client that builds standard verifier inputs."""
     async def evaluate(
@@ -554,7 +754,6 @@ class VerifierClient(GraphCompletionsClient):
         input_data: dict[str, Any] = {
             "policy_name": policy_name,
             "task_app": task_app_payload,
-            "session_trace": trace_payload,
             "trace": trace_payload,
             "options": dict(options or {}),
         }
@@ -568,3 +767,10 @@ class VerifierClient(GraphCompletionsClient):
             model=model,
             prompt_snapshot_id=prompt_snapshot_id,
         )
+GraphCompletionsClient = GraphCompletionsAsyncClient
+"""Alias for GraphCompletionsAsyncClient."""
+VerifierClient = VerifierAsyncClient
+"""Alias for VerifierAsyncClient."""

synth_ai/sdk/{judging/schemas.py → graphs/verifier_schemas.py} RENAMED Viewed

@@ -2,21 +2,21 @@
 Verifier API Contract Schemas
 These schemas define the expected structure for requests and responses
-to the verifier scoring endpoint at POST /api/judge/v1/score. Zero-shot
-verifier graphs use the same response format via POST /api/graphs/completions.
+to the verifier scoring endpoint at POST /api/graphs/verifiers/completions.
+Zero-shot verifier graphs use the same response format via POST /api/graphs/completions.
 This is the canonical contract that the backend MUST conform to.
 """
 from __future__ import annotations
-from typing import Any, Literal, Optional
+from typing import Annotated, Any, Literal, Optional
 from pydantic import BaseModel, Field, model_validator
 class CriterionScorePayload(BaseModel):
-    """Per-criterion score returned by the judge."""
+    """Per-criterion score returned by the verifier."""
     score: float = Field(..., description="Numeric score for this criterion")
     reason: str = Field(default="", description="Explanation for the score")
@@ -35,11 +35,11 @@ class ReviewPayload(BaseModel):
     summary: Optional[str] = Field(None, description="Optional text summary")
-class JudgeScoreResponse(BaseModel):
+class VerifierScoreResponse(BaseModel):
     """
-    Response body for POST /api/judge/v1/score.
+    Response body for POST /api/graphs/verifiers/completions.
-    This is the canonical contract that judge backends MUST return and is
+    This is the canonical contract that verifier backends MUST return and is
     also used as the zero-shot verifier graph output.
     """
@@ -90,24 +90,24 @@ class JudgeScoreResponse(BaseModel):
 # Request schemas for completeness
-class JudgeTaskApp(BaseModel):
+class VerifierTaskApp(BaseModel):
     """Task application metadata."""
     id: str = Field(..., description="Task app identifier")
     base_url: Optional[str] = Field(None, description="Optional base URL for task app")
-class JudgeOptions(BaseModel):
-    """Judge provider and configuration options."""
+class VerifierOptions(BaseModel):
+    """Verifier provider and configuration options."""
-    provider: Optional[str] = Field(None, description="Judge provider (e.g., 'openai', 'groq')")
+    provider: Optional[str] = Field(None, description="Verifier provider (e.g., 'openai', 'groq')")
     model: Optional[str] = Field(None, description="Model identifier")
     rubric_id: Optional[str] = Field(None, description="Rubric identifier")
-    event: bool = Field(True, description="Enable event-level judging")
-    outcome: bool = Field(True, description="Enable outcome-level judging")
+    event: bool = Field(True, description="Enable event-level verification")
+    outcome: bool = Field(True, description="Enable outcome-level verification")
-class JudgeTracePayload(BaseModel):
+class VerifierTracePayload(BaseModel):
     """Trace payload containing trajectory context."""
     event_history: list[dict[str, Any]] = Field(..., description="List of events/steps")
@@ -118,13 +118,13 @@ class JudgeTracePayload(BaseModel):
     metadata: dict[str, Any] = Field(default_factory=dict, description="Trace metadata")
-class JudgeScoreRequest(BaseModel):
-    """Request body for POST /api/judge/v1/score."""
+class VerifierScoreRequest(BaseModel):
+    """Request body for POST /api/graphs/verifiers/completions."""
     policy_name: str = Field(..., description="Name of the policy being evaluated")
-    task_app: JudgeTaskApp = Field(..., description="Task application metadata")
-    trace: JudgeTracePayload = Field(..., description="Trajectory trace to evaluate")
-    options: JudgeOptions = Field(default_factory=lambda: JudgeOptions(), description="Judge options")
+    task_app: VerifierTaskApp = Field(..., description="Task application metadata")
+    trace: VerifierTracePayload = Field(..., description="Trajectory trace to evaluate")
+    options: VerifierOptions = Field(default_factory=lambda: VerifierOptions(), description="Verifier options")
     rubric: Optional[dict[str, Any]] = Field(None, description="Optional explicit rubric criteria")
@@ -139,11 +139,11 @@ class CalibrationExampleInput(BaseModel):
     session_trace: dict[str, Any] = Field(..., description="V3 SessionTrace format (validated separately)")
     event_rewards: list[Annotated[float, Field(ge=0.0, le=1.0)]] = Field(
-        ...,
+        ...,
         description="List of rewards per event (0.0-1.0), must match number of events in trace"
     )
     outcome_reward: Annotated[float, Field(ge=0.0, le=1.0)] = Field(
-        ...,
+        ...,
         description="Overall outcome reward (0.0-1.0)"
     )
     metadata: dict[str, Any] = Field(default_factory=dict, description="Optional metadata")
@@ -200,12 +200,12 @@ class GoldExampleInput(BaseModel):
     summary: str = Field(..., min_length=1, description="Summary of the trace being evaluated")
     gold_score: Annotated[float, Field(ge=0.0, le=1.0)] = Field(
-        ...,
+        ...,
         description="Gold-standard score (0.0-1.0)"
     )
     gold_reasoning: str = Field(..., min_length=1, description="Gold-standard reasoning/explanation")
     session_trace: Optional[dict[str, Any]] = Field(
-        None,
+        None,
         description="Optional full trace (for richer evaluation)"
     )
     metadata: dict[str, Any] = Field(default_factory=dict, description="Optional metadata")

synth_ai/sdk/learning/__init__.py CHANGED Viewed

@@ -1,6 +1,23 @@
 from synth_ai.sdk.task import task_app_health, validate_task_app_url
 from .client import LearningClient
+from .context_learning_client import (
+    ContextLearningClient,
+    create_job as create_context_learning_job,
+    get_best_script as get_context_learning_best_script,
+    get_job_status as get_context_learning_status,
+    run_job as run_context_learning_job,
+)
+from .context_learning_types import (
+    AlgorithmConfig,
+    BestScriptResult,
+    ContextLearningEvent,
+    ContextLearningJobConfig,
+    ContextLearningJobStatus,
+    ContextLearningMetric,
+    ContextLearningResults,
+    EnvironmentConfig,
+)
 from .health import backend_health, balance_autumn_normalized, pricing_preflight
 from .jobs import JobHandle, JobsApiResolver
 from .prompt_learning_client import (
@@ -20,8 +37,6 @@ from .rl import (
     RolloutRequest,
     RolloutResponse,
     RolloutSafetyConfig,
-    RolloutStep,
-    RolloutTrajectory,
     encrypt_for_backend,
     mint_environment_api_key,
     setup_environment_api_key,
@@ -32,30 +47,45 @@ from .sse import stream_events as stream_job_events
 from .validators import validate_trainer_cfg_rl, validate_training_jsonl
 __all__ = [
+    # Learning clients
     "LearningClient",
     "RlClient",
     "RLJobConfig",
     "FtClient",
     "SFTJobConfig",
     "prepare_sft_job_payload",
+    # Prompt Learning
     "PromptLearningClient",
     "get_prompts",
     "get_prompt_text",
     "get_scoring_summary",
+    # Context Learning
+    "ContextLearningClient",
+    "ContextLearningJobConfig",
+    "ContextLearningJobStatus",
+    "ContextLearningEvent",
+    "ContextLearningMetric",
+    "ContextLearningResults",
+    "BestScriptResult",
+    "EnvironmentConfig",
+    "AlgorithmConfig",
+    "create_context_learning_job",
+    "get_context_learning_status",
+    "get_context_learning_best_script",
+    "run_context_learning_job",
+    # RL types
     "RolloutEnvSpec",
     "RolloutPolicySpec",
     "RolloutRecordConfig",
     "RolloutSafetyConfig",
     "RolloutRequest",
-    "RolloutStep",
-    "RolloutTrajectory",
     "RolloutMetrics",
     "RolloutResponse",
     "mint_environment_api_key",
     "encrypt_for_backend",
     "setup_environment_api_key",
     "MAX_ENVIRONMENT_API_KEY_BYTES",
-    # convenience re-export for typing
+    # Utilities
     "validate_training_jsonl",
     "validate_trainer_cfg_rl",
     "validate_task_app_url",

synth-ai 0.4.1__py3-none-any.whl → 0.4.4__py3-none-any.whl

Potentially problematic release.

synth-ai 0.4.1py3-none-any.whl → 0.4.4py3-none-any.whl