PyPI - synth-ai - Versions diffs - 0.4.1__py3-none-any.whl → 0.4.4__py3-none-any.whl - Mend

synth-ai 0.4.1py3-none-any.whl → 0.4.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of synth-ai might be problematic. Click here for more details.

Files changed (153) hide show

synth_ai/__init__.py +13 -13
synth_ai/cli/__init__.py +6 -15
synth_ai/cli/commands/eval/__init__.py +6 -15
synth_ai/cli/commands/eval/config.py +338 -0
synth_ai/cli/commands/eval/core.py +236 -1091
synth_ai/cli/commands/eval/runner.py +704 -0
synth_ai/cli/commands/eval/validation.py +44 -117
synth_ai/cli/commands/filter/core.py +7 -7
synth_ai/cli/commands/filter/validation.py +2 -2
synth_ai/cli/commands/smoke/core.py +7 -17
synth_ai/cli/commands/status/__init__.py +1 -64
synth_ai/cli/commands/status/client.py +50 -151
synth_ai/cli/commands/status/config.py +3 -83
synth_ai/cli/commands/status/errors.py +4 -13
synth_ai/cli/commands/status/subcommands/__init__.py +2 -8
synth_ai/cli/commands/status/subcommands/config.py +13 -0
synth_ai/cli/commands/status/subcommands/files.py +18 -63
synth_ai/cli/commands/status/subcommands/jobs.py +28 -311
synth_ai/cli/commands/status/subcommands/models.py +18 -62
synth_ai/cli/commands/status/subcommands/runs.py +16 -63
synth_ai/cli/commands/status/subcommands/session.py +67 -172
synth_ai/cli/commands/status/subcommands/summary.py +24 -32
synth_ai/cli/commands/status/subcommands/utils.py +41 -0
synth_ai/cli/commands/status/utils.py +16 -107
synth_ai/cli/commands/train/__init__.py +18 -20
synth_ai/cli/commands/train/errors.py +3 -3
synth_ai/cli/commands/train/prompt_learning_validation.py +15 -16
synth_ai/cli/commands/train/validation.py +7 -7
synth_ai/cli/commands/train/{judge_schemas.py → verifier_schemas.py} +33 -34
synth_ai/cli/commands/train/verifier_validation.py +235 -0
synth_ai/cli/demo_apps/demo_task_apps/math/config.toml +0 -1
synth_ai/cli/demo_apps/demo_task_apps/math/modal_task_app.py +2 -6
synth_ai/cli/demo_apps/math/config.toml +0 -1
synth_ai/cli/demo_apps/math/modal_task_app.py +2 -6
synth_ai/cli/demo_apps/mipro/task_app.py +25 -47
synth_ai/cli/lib/apps/task_app.py +12 -13
synth_ai/cli/lib/task_app_discovery.py +6 -6
synth_ai/cli/lib/train_cfgs.py +10 -10
synth_ai/cli/task_apps/__init__.py +11 -0
synth_ai/cli/task_apps/commands.py +7 -15
synth_ai/core/env.py +12 -1
synth_ai/core/errors.py +1 -2
synth_ai/core/integrations/cloudflare.py +209 -33
synth_ai/core/tracing_v3/abstractions.py +46 -0
synth_ai/data/__init__.py +3 -30
synth_ai/data/enums.py +1 -20
synth_ai/data/rewards.py +100 -3
synth_ai/products/graph_evolve/__init__.py +1 -2
synth_ai/products/graph_evolve/config.py +16 -16
synth_ai/products/graph_evolve/converters/__init__.py +3 -3
synth_ai/products/graph_evolve/converters/openai_sft.py +7 -7
synth_ai/products/graph_evolve/examples/hotpotqa/config.toml +1 -1
synth_ai/products/graph_gepa/__init__.py +23 -0
synth_ai/products/graph_gepa/converters/__init__.py +19 -0
synth_ai/products/graph_gepa/converters/openai_sft.py +29 -0
synth_ai/sdk/__init__.py +45 -35
synth_ai/sdk/api/eval/__init__.py +33 -0
synth_ai/sdk/api/eval/job.py +732 -0
synth_ai/sdk/api/research_agent/__init__.py +276 -66
synth_ai/sdk/api/train/builders.py +181 -0
synth_ai/sdk/api/train/cli.py +41 -33
synth_ai/sdk/api/train/configs/__init__.py +6 -4
synth_ai/sdk/api/train/configs/prompt_learning.py +127 -33
synth_ai/sdk/api/train/configs/rl.py +264 -16
synth_ai/sdk/api/train/configs/sft.py +165 -1
synth_ai/sdk/api/train/graph_validators.py +12 -12
synth_ai/sdk/api/train/graphgen.py +169 -51
synth_ai/sdk/api/train/graphgen_models.py +95 -45
synth_ai/sdk/api/train/local_api.py +10 -0
synth_ai/sdk/api/train/pollers.py +36 -0
synth_ai/sdk/api/train/prompt_learning.py +390 -60
synth_ai/sdk/api/train/rl.py +41 -5
synth_ai/sdk/api/train/sft.py +2 -0
synth_ai/sdk/api/train/task_app.py +20 -0
synth_ai/sdk/api/train/validators.py +17 -17
synth_ai/sdk/graphs/completions.py +239 -33
synth_ai/sdk/{judging/schemas.py → graphs/verifier_schemas.py} +23 -23
synth_ai/sdk/learning/__init__.py +35 -5
synth_ai/sdk/learning/context_learning_client.py +531 -0
synth_ai/sdk/learning/context_learning_types.py +294 -0
synth_ai/sdk/learning/prompt_learning_client.py +1 -1
synth_ai/sdk/learning/prompt_learning_types.py +2 -1
synth_ai/sdk/learning/rl/__init__.py +0 -4
synth_ai/sdk/learning/rl/contracts.py +0 -4
synth_ai/sdk/localapi/__init__.py +40 -0
synth_ai/sdk/localapi/apps/__init__.py +28 -0
synth_ai/sdk/localapi/client.py +10 -0
synth_ai/sdk/localapi/contracts.py +10 -0
synth_ai/sdk/localapi/helpers.py +519 -0
synth_ai/sdk/localapi/rollouts.py +93 -0
synth_ai/sdk/localapi/server.py +29 -0
synth_ai/sdk/localapi/template.py +49 -0
synth_ai/sdk/streaming/handlers.py +6 -6
synth_ai/sdk/streaming/streamer.py +10 -6
synth_ai/sdk/task/__init__.py +18 -5
synth_ai/sdk/task/apps/__init__.py +37 -1
synth_ai/sdk/task/client.py +9 -1
synth_ai/sdk/task/config.py +6 -11
synth_ai/sdk/task/contracts.py +137 -95
synth_ai/sdk/task/in_process.py +32 -22
synth_ai/sdk/task/in_process_runner.py +9 -4
synth_ai/sdk/task/rubrics/__init__.py +2 -3
synth_ai/sdk/task/rubrics/loaders.py +4 -4
synth_ai/sdk/task/rubrics/strict.py +3 -4
synth_ai/sdk/task/server.py +76 -16
synth_ai/sdk/task/trace_correlation_helpers.py +190 -139
synth_ai/sdk/task/validators.py +34 -49
synth_ai/sdk/training/__init__.py +7 -16
synth_ai/sdk/tunnels/__init__.py +118 -0
synth_ai/sdk/tunnels/cleanup.py +83 -0
synth_ai/sdk/tunnels/ports.py +120 -0
synth_ai/sdk/tunnels/tunneled_api.py +363 -0
{synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/METADATA +71 -4
{synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/RECORD +118 -128
synth_ai/cli/commands/baseline/__init__.py +0 -12
synth_ai/cli/commands/baseline/core.py +0 -636
synth_ai/cli/commands/baseline/list.py +0 -94
synth_ai/cli/commands/eval/errors.py +0 -81
synth_ai/cli/commands/status/formatters.py +0 -164
synth_ai/cli/commands/status/subcommands/pricing.py +0 -23
synth_ai/cli/commands/status/subcommands/usage.py +0 -203
synth_ai/cli/commands/train/judge_validation.py +0 -305
synth_ai/cli/usage.py +0 -159
synth_ai/data/specs.py +0 -36
synth_ai/sdk/api/research_agent/cli.py +0 -428
synth_ai/sdk/api/research_agent/config.py +0 -357
synth_ai/sdk/api/research_agent/job.py +0 -717
synth_ai/sdk/baseline/__init__.py +0 -25
synth_ai/sdk/baseline/config.py +0 -209
synth_ai/sdk/baseline/discovery.py +0 -216
synth_ai/sdk/baseline/execution.py +0 -154
synth_ai/sdk/judging/__init__.py +0 -15
synth_ai/sdk/judging/base.py +0 -24
synth_ai/sdk/judging/client.py +0 -191
synth_ai/sdk/judging/types.py +0 -42
synth_ai/sdk/research_agent/__init__.py +0 -34
synth_ai/sdk/research_agent/container_builder.py +0 -328
synth_ai/sdk/research_agent/container_spec.py +0 -198
synth_ai/sdk/research_agent/defaults.py +0 -34
synth_ai/sdk/research_agent/results_collector.py +0 -69
synth_ai/sdk/specs/__init__.py +0 -46
synth_ai/sdk/specs/dataclasses.py +0 -149
synth_ai/sdk/specs/loader.py +0 -144
synth_ai/sdk/specs/serializer.py +0 -199
synth_ai/sdk/specs/validation.py +0 -250
synth_ai/sdk/tracing/__init__.py +0 -39
synth_ai/sdk/usage/__init__.py +0 -37
synth_ai/sdk/usage/client.py +0 -171
synth_ai/sdk/usage/models.py +0 -261
{synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/WHEEL +0 -0
{synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/entry_points.txt +0 -0
{synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/licenses/LICENSE +0 -0
{synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/top_level.txt +0 -0

synth_ai/sdk/api/train/graphgen.py CHANGED Viewed

@@ -1,13 +1,15 @@
-"""First-class SDK API for GraphGen (Automated Design of Agentic Systems).
+"""First-class SDK API for GraphGen (Graph Opt).
+**Status:** Alpha
 GraphGen is a simplified "Workflows API" for prompt optimization that:
 - Uses a simple JSON dataset format (GraphGenTaskSet) instead of TOML configs
 - Auto-generates task apps from the dataset (no user-managed task apps)
-- Has built-in judge configurations (rubric, contrastive, gold_examples)
+- Has built-in verifier configurations (rubric, contrastive, gold_examples)
 - Wraps GEPA internally for the actual optimization
 Example CLI usage:
-    uvx synth-ai train --type adas --dataset my_tasks.json --poll
+    uvx synth-ai train --type graphgen --dataset my_tasks.json --poll
 Example SDK usage:
     from synth_ai.sdk.api.train.graphgen import GraphGenJob
@@ -46,14 +48,39 @@ from .graphgen_models import (
     load_graphgen_taskset,
     parse_graphgen_taskset,
     SessionTraceInput,
-    GraphGenGraphJudgeResponse,
+    GraphGenGraphVerifierResponse,
 )
 from .utils import ensure_api_base, http_get, http_post
 @dataclass
 class GraphGenJobResult:
-    """Result from an GraphGen job."""
+    """Result from a GraphGen job.
+    Contains the final status and results of a completed GraphGen workflow
+    optimization job, including the best score and snapshot ID for the
+    optimized graph.
+    Attributes:
+        graphgen_job_id: Unique identifier for the GraphGen job (e.g.,
+            "graphgen_abc123def456").
+        status: Current job status. One of: "pending", "running", "succeeded",
+            "failed", "cancelled".
+        best_score: Best evaluation score achieved during optimization. Higher
+            is better. None if job hasn't completed successfully.
+        best_snapshot_id: ID of the graph snapshot with the best score. Use this
+            to download or deploy the optimized graph.
+        error: Error message if the job failed, None otherwise.
+        dataset_name: Name of the dataset used for optimization.
+        task_count: Number of tasks in the dataset.
+        graph_evolve_job_id: ID of the underlying graph evolution job, if applicable.
+    Example:
+        >>> result = job.get_result()
+        >>> if result.status == "succeeded":
+        ...     print(f"Best score: {result.best_score}")
+        ...     print(f"Snapshot ID: {result.best_snapshot_id}")
+    """
     graphgen_job_id: str
     status: str
@@ -67,7 +94,29 @@ class GraphGenJobResult:
 @dataclass
 class GraphGenSubmitResult:
-    """Result from submitting an GraphGen job."""
+    """Result from submitting a GraphGen job.
+    Returned immediately after job submission with initial job metadata
+    and configuration details.
+    Attributes:
+        graphgen_job_id: Unique identifier for the GraphGen job.
+        status: Initial job status (typically "pending" or "running").
+        dataset_name: Name of the dataset being used for optimization.
+        task_count: Number of tasks in the dataset.
+        rollout_budget: Total number of rollouts (evaluations) budgeted for
+            this optimization job.
+        policy_model: Name of the LLM model being used for the policy
+            (e.g., "gpt-4o-mini", "claude-3-5-sonnet").
+        verifier_mode: Evaluation mode being used. One of: "rubric", "contrastive",
+            "gold_examples", "verifier_graph".
+        graph_evolve_job_id: ID of the underlying graph evolution job, if applicable.
+    Example:
+        >>> submit_result = job.submit()
+        >>> print(f"Job {submit_result.graphgen_job_id} started")
+        >>> print(f"Optimizing {submit_result.task_count} tasks with {submit_result.rollout_budget} rollouts")
+    """
     graphgen_job_id: str
     status: str
@@ -75,20 +124,20 @@ class GraphGenSubmitResult:
     task_count: int
     rollout_budget: int
     policy_model: str
-    judge_mode: str
+    verifier_mode: str
     graph_evolve_job_id: Optional[str] = None
 class GraphGenJob:
     """High-level SDK class for running GraphGen workflow optimization jobs.
-    GraphGen (Automated Design of Agentic Systems) provides a simplified API for
+    GraphGen (Graph Opt) provides a simplified API for
     graph/workflow optimization that doesn't require users to manage task apps.
     Key differences from PromptLearningJob:
     - Uses JSON dataset format (GraphGenTaskSet) instead of TOML configs
     - No task app management required - GraphGen builds it internally
-    - Built-in judge modes (rubric, contrastive, gold_examples)
+    - Built-in verifier modes (rubric, contrastive, gold_examples)
     - Graph-first: trains multi-node workflows by default (Graph-GEPA)
     - Public graph downloads are redacted `.txt` exports only
     - Simpler configuration with sensible defaults
@@ -103,7 +152,7 @@ class GraphGenJob:
         ...     rollout_budget=100,
         ... )
         >>>
-        >>> # Train a verifier graph (judge)
+        >>> # Train a verifier graph
         >>> verifier_job = GraphGenJob.from_dataset(
         ...     dataset="verifier_dataset.json",
         ...     graph_type="verifier",
@@ -133,9 +182,9 @@ class GraphGenJob:
         >>> # Run inference with optimized prompt
         >>> output = job.run_inference({"question": "What is 2+2?"})
         >>>
-        >>> # Run judge with optimized verifier graph
-        >>> judgment = verifier_job.run_judge(trace_data)
-        >>> print(f"Score: {judgment.score}, Reasoning: {judgment.reasoning}")
+        >>> # Run verifier with optimized verifier graph
+        >>> verification = verifier_job.run_verifier(trace_data)
+        >>> print(f"Outcome reward: {verification.outcome_reward}")
     """
     def __init__(
@@ -178,8 +227,8 @@ class GraphGenJob:
         policy_model: str = "gpt-4o-mini",
         rollout_budget: int = 100,
         proposer_effort: Literal["low", "medium", "high"] = "medium",
-        judge_model: Optional[str] = None,
-        judge_provider: Optional[str] = None,
+        verifier_model: Optional[str] = None,
+        verifier_provider: Optional[str] = None,
         population_size: int = 4,
         num_generations: Optional[int] = None,
         problem_spec: Optional[str] = None,
@@ -196,15 +245,15 @@ class GraphGenJob:
             dataset: Dataset as file path, dict, or GraphGenTaskSet object
             graph_type: Type of graph to train:
                 - "policy": Maps inputs to outputs (default).
-                - "verifier": Judges/scores traces (requires verifier-compliant dataset).
+                - "verifier": Verifies/scores traces (requires verifier-compliant dataset).
                 - "rlm": Recursive Language Model - handles massive contexts via tool-based search
                   and recursive LLM calls. Requires configured_tools parameter.
             policy_model: Model to use for policy inference
             rollout_budget: Total number of rollouts for optimization
             proposer_effort: Proposer effort level ("medium" or "high").
                 "low" is not allowed as gpt-4.1-mini is too weak for graph generation.
-            judge_model: Override judge model from dataset
-            judge_provider: Override judge provider from dataset
+            verifier_model: Override verifier model from dataset
+            verifier_provider: Override verifier provider from dataset
             population_size: Population size for GEPA
             num_generations: Number of generations (auto-calculated if not specified)
             problem_spec: Detailed problem specification for the graph proposer.
@@ -270,8 +319,8 @@ class GraphGenJob:
             policy_model=policy_model,
             rollout_budget=rollout_budget,
             proposer_effort=proposer_effort,
-            judge_model=judge_model,
-            judge_provider=judge_provider,
+            verifier_model=verifier_model,
+            verifier_provider=verifier_provider,
             population_size=population_size,
             num_generations=num_generations,
             problem_spec=problem_spec,
@@ -405,8 +454,8 @@ class GraphGenJob:
             "policy_provider": self.config.policy_provider,
             "rollout_budget": self.config.rollout_budget,
             "proposer_effort": self.config.proposer_effort,
-            "judge_model": self.config.judge_model,
-            "judge_provider": self.config.judge_provider,
+            "verifier_model": self.config.verifier_model,
+            "verifier_provider": self.config.verifier_provider,
             "problem_spec": self.config.problem_spec,
             "target_llm_calls": self.config.target_llm_calls,
             "configured_tools": self.config.configured_tools,
@@ -423,10 +472,10 @@ class GraphGenJob:
             payload.pop("feedback_sample_size", None)
         if payload.get("policy_provider") is None:
             payload.pop("policy_provider", None)
-        if payload.get("judge_model") is None:
-            payload.pop("judge_model", None)
-        if payload.get("judge_provider") is None:
-            payload.pop("judge_provider", None)
+        if payload.get("verifier_model") is None:
+            payload.pop("verifier_model", None)
+        if payload.get("verifier_provider") is None:
+            payload.pop("verifier_provider", None)
         if payload.get("problem_spec") is None:
             payload.pop("problem_spec", None)
         if payload.get("target_llm_calls") is None:
@@ -458,7 +507,7 @@ class GraphGenJob:
         payload = self._build_payload()
-        # Submit job - use /graphgen/jobs endpoint (legacy: /adas/jobs)
+        # Submit job - use /graphgen/jobs endpoint
         create_url = f"{self.backend_url}/graphgen/jobs"
         headers = {
             "X-API-Key": self.api_key,
@@ -501,7 +550,7 @@ class GraphGenJob:
             task_count=js.get("task_count", len(self.dataset.tasks)),
             rollout_budget=js.get("rollout_budget", self.config.rollout_budget),
             policy_model=js.get("policy_model", self.config.policy_model),
-            judge_mode=js.get("judge_mode", self.dataset.judge_config.mode),
+            verifier_mode=js.get("verifier_mode", self.dataset.verifier_config.mode),
             graph_evolve_job_id=self._graph_evolve_job_id,
         )
@@ -703,7 +752,7 @@ class GraphGenJob:
             base_url=self.backend_url,
             api_key=self.api_key,
             job_id=self.job_id,  # Only GraphGen job ID - backend resolves to GEPA internally
-            endpoints=StreamEndpoints.adas(self.job_id),
+            endpoints=StreamEndpoints.graphgen(self.job_id),
             config=config,
             handlers=list(handlers),
             interval_seconds=interval,
@@ -715,6 +764,88 @@ class GraphGenJob:
         return final_status
+    def poll_until_complete(
+        self,
+        *,
+        timeout: float = 3600.0,
+        interval: float = 5.0,
+        progress: bool = False,
+        on_status: Optional[Callable[[Dict[str, Any]], None]] = None,
+    ) -> Dict[str, Any]:
+        """Poll job until it reaches a terminal state.
+        Similar to PromptLearningJob.poll_until_complete(), this method polls
+        the backend periodically instead of using SSE streaming. Useful for
+        notebooks and environments where SSE may not work reliably.
+        Args:
+            timeout: Maximum seconds to wait (default: 3600 = 1 hour)
+            interval: Seconds between poll attempts (default: 5)
+            progress: If True, print status updates during polling (useful for notebooks)
+            on_status: Optional callback called on each status update
+        Returns:
+            Final job status dictionary containing 'status', 'best_score', etc.
+        Raises:
+            RuntimeError: If job hasn't been submitted yet
+            TimeoutError: If timeout is exceeded
+        Example:
+            >>> result = job.poll_until_complete(progress=True)
+            [00:15] running | score: 0.72
+            [00:30] running | score: 0.78
+            [00:45] succeeded | score: 0.85
+        """
+        if not self.job_id:
+            raise RuntimeError("Job not yet submitted. Call submit() first.")
+        import time
+        start_time = time.time()
+        elapsed = 0.0
+        last_data: Dict[str, Any] = {}
+        while elapsed <= timeout:
+            try:
+                status_data = self.get_status()
+                last_data = dict(status_data) if isinstance(status_data, dict) else {}
+                status = last_data.get("status", "unknown")
+                best_score = last_data.get("best_score")
+                # Progress output
+                if progress:
+                    mins, secs = divmod(int(elapsed), 60)
+                    score_str = f"score: {best_score:.2f}" if best_score is not None else "score: --"
+                    print(f"[{mins:02d}:{secs:02d}] {status} | {score_str}")
+                # Callback for custom handling
+                if on_status:
+                    on_status(last_data)
+                # Check terminal state
+                if status in ("succeeded", "completed", "failed", "error", "cancelled"):
+                    return last_data
+                # Sleep before next poll
+                time.sleep(interval)
+                elapsed = time.time() - start_time
+            except Exception as e:
+                # On error, continue polling (might be transient network issue)
+                import logging
+                logger = logging.getLogger(__name__)
+                logger.warning(f"Error polling job status: {e}")
+                time.sleep(interval)
+                elapsed = time.time() - start_time
+        # Timeout exceeded
+        raise TimeoutError(
+            f"Job {self.job_id} did not complete within {timeout}s timeout. "
+            f"Current status: {last_data.get('status', 'unknown')}"
+        )
     def download_prompt(self) -> str:
         """Download the optimized prompt from a completed job.
@@ -773,6 +904,7 @@ class GraphGenJob:
         model: Optional[str] = None,
         prompt_snapshot_id: Optional[str] = None,
         graph_snapshot_id: Optional[str] = None,
+        timeout: float = 120.0,
     ) -> Dict[str, Any]:
         """Run inference with the optimized graph/workflow.
@@ -783,6 +915,7 @@ class GraphGenJob:
             graph_snapshot_id: Specific GraphSnapshot to use (default: best).
                 Preferred for graph-first jobs. If provided, it is sent as
                 `prompt_snapshot_id` for backward-compatible backend routing.
+            timeout: Request timeout in seconds (default: 120.0 = 2 minutes for image generation tasks)
         Returns:
             Output dictionary containing 'output', 'usage', etc.
@@ -813,7 +946,8 @@ class GraphGenJob:
         if snapshot_id:
             payload["prompt_snapshot_id"] = snapshot_id
-        resp = http_post(url, headers=headers, json_body=payload, timeout=60.0)
+        # Use longer timeout for image generation tasks (can take 2-3 minutes)
+        resp = http_post(url, headers=headers, json_body=payload, timeout=timeout)
         if resp.status_code != 200:
             raise RuntimeError(
@@ -848,11 +982,11 @@ class GraphGenJob:
         context: Optional[Dict[str, Any]] = None,
         prompt_snapshot_id: Optional[str] = None,
         graph_snapshot_id: Optional[str] = None,
-    ) -> GraphGenGraphJudgeResponse:
+    ) -> GraphGenGraphVerifierResponse:
         """Run a verifier graph on an execution trace.
         This method is specifically for graphs trained with graph_type=\"verifier\".
-        It accepts a V3 trace and returns structured rewards (score, reasoning, per-event rewards).
+        It accepts a V3 trace and returns structured rewards.
         Args:
             session_trace: V3 session trace to evaluate. Can be a dict or SessionTraceInput.
@@ -862,7 +996,7 @@ class GraphGenJob:
                 Preferred for graph-first jobs.
         Returns:
-            GraphGenGraphJudgeResponse containing structured rewards and reasoning.
+            GraphGenGraphVerifierResponse containing structured rewards.
         Raises:
             RuntimeError: If job hasn't been submitted or inference fails.
@@ -873,7 +1007,7 @@ class GraphGenJob:
         if prompt_snapshot_id and graph_snapshot_id:
             raise ValueError("Provide only one of prompt_snapshot_id or graph_snapshot_id.")
-        url = f"{self.backend_url}/graphgen/graph/judge"
+        url = f"{self.backend_url}/graphgen/graph/verifier"
         headers = {
             "X-API-Key": self.api_key,
             "Content-Type": "application/json",
@@ -902,23 +1036,7 @@ class GraphGenJob:
                 f"Verifier inference failed: {resp.status_code} - {resp.text[:500]}"
             )
-        return GraphGenGraphJudgeResponse.model_validate(resp.json())
-    def run_judge(
-        self,
-        session_trace: Dict[str, Any] | SessionTraceInput,
-        *,
-        context: Optional[Dict[str, Any]] = None,
-        prompt_snapshot_id: Optional[str] = None,
-        graph_snapshot_id: Optional[str] = None,
-    ) -> GraphGenGraphJudgeResponse:
-        """Deprecated: use run_verifier instead."""
-        return self.run_verifier(
-            session_trace=session_trace,
-            context=context,
-            prompt_snapshot_id=prompt_snapshot_id,
-            graph_snapshot_id=graph_snapshot_id,
-        )
+        return GraphGenGraphVerifierResponse.model_validate(resp.json())
     def get_graph_record(
         self,

synth-ai 0.4.1__py3-none-any.whl → 0.4.4__py3-none-any.whl

Potentially problematic release.

synth-ai 0.4.1py3-none-any.whl → 0.4.4py3-none-any.whl