PyPI - synth-ai - Versions diffs - 0.4.1__py3-none-any.whl → 0.4.4__py3-none-any.whl - Mend

synth-ai 0.4.1py3-none-any.whl → 0.4.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of synth-ai might be problematic. Click here for more details.

Files changed (153) hide show

synth_ai/__init__.py +13 -13
synth_ai/cli/__init__.py +6 -15
synth_ai/cli/commands/eval/__init__.py +6 -15
synth_ai/cli/commands/eval/config.py +338 -0
synth_ai/cli/commands/eval/core.py +236 -1091
synth_ai/cli/commands/eval/runner.py +704 -0
synth_ai/cli/commands/eval/validation.py +44 -117
synth_ai/cli/commands/filter/core.py +7 -7
synth_ai/cli/commands/filter/validation.py +2 -2
synth_ai/cli/commands/smoke/core.py +7 -17
synth_ai/cli/commands/status/__init__.py +1 -64
synth_ai/cli/commands/status/client.py +50 -151
synth_ai/cli/commands/status/config.py +3 -83
synth_ai/cli/commands/status/errors.py +4 -13
synth_ai/cli/commands/status/subcommands/__init__.py +2 -8
synth_ai/cli/commands/status/subcommands/config.py +13 -0
synth_ai/cli/commands/status/subcommands/files.py +18 -63
synth_ai/cli/commands/status/subcommands/jobs.py +28 -311
synth_ai/cli/commands/status/subcommands/models.py +18 -62
synth_ai/cli/commands/status/subcommands/runs.py +16 -63
synth_ai/cli/commands/status/subcommands/session.py +67 -172
synth_ai/cli/commands/status/subcommands/summary.py +24 -32
synth_ai/cli/commands/status/subcommands/utils.py +41 -0
synth_ai/cli/commands/status/utils.py +16 -107
synth_ai/cli/commands/train/__init__.py +18 -20
synth_ai/cli/commands/train/errors.py +3 -3
synth_ai/cli/commands/train/prompt_learning_validation.py +15 -16
synth_ai/cli/commands/train/validation.py +7 -7
synth_ai/cli/commands/train/{judge_schemas.py → verifier_schemas.py} +33 -34
synth_ai/cli/commands/train/verifier_validation.py +235 -0
synth_ai/cli/demo_apps/demo_task_apps/math/config.toml +0 -1
synth_ai/cli/demo_apps/demo_task_apps/math/modal_task_app.py +2 -6
synth_ai/cli/demo_apps/math/config.toml +0 -1
synth_ai/cli/demo_apps/math/modal_task_app.py +2 -6
synth_ai/cli/demo_apps/mipro/task_app.py +25 -47
synth_ai/cli/lib/apps/task_app.py +12 -13
synth_ai/cli/lib/task_app_discovery.py +6 -6
synth_ai/cli/lib/train_cfgs.py +10 -10
synth_ai/cli/task_apps/__init__.py +11 -0
synth_ai/cli/task_apps/commands.py +7 -15
synth_ai/core/env.py +12 -1
synth_ai/core/errors.py +1 -2
synth_ai/core/integrations/cloudflare.py +209 -33
synth_ai/core/tracing_v3/abstractions.py +46 -0
synth_ai/data/__init__.py +3 -30
synth_ai/data/enums.py +1 -20
synth_ai/data/rewards.py +100 -3
synth_ai/products/graph_evolve/__init__.py +1 -2
synth_ai/products/graph_evolve/config.py +16 -16
synth_ai/products/graph_evolve/converters/__init__.py +3 -3
synth_ai/products/graph_evolve/converters/openai_sft.py +7 -7
synth_ai/products/graph_evolve/examples/hotpotqa/config.toml +1 -1
synth_ai/products/graph_gepa/__init__.py +23 -0
synth_ai/products/graph_gepa/converters/__init__.py +19 -0
synth_ai/products/graph_gepa/converters/openai_sft.py +29 -0
synth_ai/sdk/__init__.py +45 -35
synth_ai/sdk/api/eval/__init__.py +33 -0
synth_ai/sdk/api/eval/job.py +732 -0
synth_ai/sdk/api/research_agent/__init__.py +276 -66
synth_ai/sdk/api/train/builders.py +181 -0
synth_ai/sdk/api/train/cli.py +41 -33
synth_ai/sdk/api/train/configs/__init__.py +6 -4
synth_ai/sdk/api/train/configs/prompt_learning.py +127 -33
synth_ai/sdk/api/train/configs/rl.py +264 -16
synth_ai/sdk/api/train/configs/sft.py +165 -1
synth_ai/sdk/api/train/graph_validators.py +12 -12
synth_ai/sdk/api/train/graphgen.py +169 -51
synth_ai/sdk/api/train/graphgen_models.py +95 -45
synth_ai/sdk/api/train/local_api.py +10 -0
synth_ai/sdk/api/train/pollers.py +36 -0
synth_ai/sdk/api/train/prompt_learning.py +390 -60
synth_ai/sdk/api/train/rl.py +41 -5
synth_ai/sdk/api/train/sft.py +2 -0
synth_ai/sdk/api/train/task_app.py +20 -0
synth_ai/sdk/api/train/validators.py +17 -17
synth_ai/sdk/graphs/completions.py +239 -33
synth_ai/sdk/{judging/schemas.py → graphs/verifier_schemas.py} +23 -23
synth_ai/sdk/learning/__init__.py +35 -5
synth_ai/sdk/learning/context_learning_client.py +531 -0
synth_ai/sdk/learning/context_learning_types.py +294 -0
synth_ai/sdk/learning/prompt_learning_client.py +1 -1
synth_ai/sdk/learning/prompt_learning_types.py +2 -1
synth_ai/sdk/learning/rl/__init__.py +0 -4
synth_ai/sdk/learning/rl/contracts.py +0 -4
synth_ai/sdk/localapi/__init__.py +40 -0
synth_ai/sdk/localapi/apps/__init__.py +28 -0
synth_ai/sdk/localapi/client.py +10 -0
synth_ai/sdk/localapi/contracts.py +10 -0
synth_ai/sdk/localapi/helpers.py +519 -0
synth_ai/sdk/localapi/rollouts.py +93 -0
synth_ai/sdk/localapi/server.py +29 -0
synth_ai/sdk/localapi/template.py +49 -0
synth_ai/sdk/streaming/handlers.py +6 -6
synth_ai/sdk/streaming/streamer.py +10 -6
synth_ai/sdk/task/__init__.py +18 -5
synth_ai/sdk/task/apps/__init__.py +37 -1
synth_ai/sdk/task/client.py +9 -1
synth_ai/sdk/task/config.py +6 -11
synth_ai/sdk/task/contracts.py +137 -95
synth_ai/sdk/task/in_process.py +32 -22
synth_ai/sdk/task/in_process_runner.py +9 -4
synth_ai/sdk/task/rubrics/__init__.py +2 -3
synth_ai/sdk/task/rubrics/loaders.py +4 -4
synth_ai/sdk/task/rubrics/strict.py +3 -4
synth_ai/sdk/task/server.py +76 -16
synth_ai/sdk/task/trace_correlation_helpers.py +190 -139
synth_ai/sdk/task/validators.py +34 -49
synth_ai/sdk/training/__init__.py +7 -16
synth_ai/sdk/tunnels/__init__.py +118 -0
synth_ai/sdk/tunnels/cleanup.py +83 -0
synth_ai/sdk/tunnels/ports.py +120 -0
synth_ai/sdk/tunnels/tunneled_api.py +363 -0
{synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/METADATA +71 -4
{synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/RECORD +118 -128
synth_ai/cli/commands/baseline/__init__.py +0 -12
synth_ai/cli/commands/baseline/core.py +0 -636
synth_ai/cli/commands/baseline/list.py +0 -94
synth_ai/cli/commands/eval/errors.py +0 -81
synth_ai/cli/commands/status/formatters.py +0 -164
synth_ai/cli/commands/status/subcommands/pricing.py +0 -23
synth_ai/cli/commands/status/subcommands/usage.py +0 -203
synth_ai/cli/commands/train/judge_validation.py +0 -305
synth_ai/cli/usage.py +0 -159
synth_ai/data/specs.py +0 -36
synth_ai/sdk/api/research_agent/cli.py +0 -428
synth_ai/sdk/api/research_agent/config.py +0 -357
synth_ai/sdk/api/research_agent/job.py +0 -717
synth_ai/sdk/baseline/__init__.py +0 -25
synth_ai/sdk/baseline/config.py +0 -209
synth_ai/sdk/baseline/discovery.py +0 -216
synth_ai/sdk/baseline/execution.py +0 -154
synth_ai/sdk/judging/__init__.py +0 -15
synth_ai/sdk/judging/base.py +0 -24
synth_ai/sdk/judging/client.py +0 -191
synth_ai/sdk/judging/types.py +0 -42
synth_ai/sdk/research_agent/__init__.py +0 -34
synth_ai/sdk/research_agent/container_builder.py +0 -328
synth_ai/sdk/research_agent/container_spec.py +0 -198
synth_ai/sdk/research_agent/defaults.py +0 -34
synth_ai/sdk/research_agent/results_collector.py +0 -69
synth_ai/sdk/specs/__init__.py +0 -46
synth_ai/sdk/specs/dataclasses.py +0 -149
synth_ai/sdk/specs/loader.py +0 -144
synth_ai/sdk/specs/serializer.py +0 -199
synth_ai/sdk/specs/validation.py +0 -250
synth_ai/sdk/tracing/__init__.py +0 -39
synth_ai/sdk/usage/__init__.py +0 -37
synth_ai/sdk/usage/client.py +0 -171
synth_ai/sdk/usage/models.py +0 -261
{synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/WHEEL +0 -0
{synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/entry_points.txt +0 -0
{synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/licenses/LICENSE +0 -0
{synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/top_level.txt +0 -0

synth_ai/sdk/task/trace_correlation_helpers.py CHANGED Viewed

@@ -2,7 +2,7 @@
 This module provides utilities for task apps to:
 1. Extract trace_correlation_id from rollout requests
-2. Include trace_correlation_id in rollout responses (3 required locations)
+2. Include trace_correlation_id in rollout responses (top-level, metadata, trace)
 See monorepo/trace_creation_and_judgement.txt "Fatal Guards" section for requirements.
 """
@@ -101,6 +101,24 @@ def extract_trace_correlation_id(
     try:
         parsed = urlparse(inference_url)
+        # 1. Try path-based extraction first (OpenAI SDK compatible format):
+        #    /v1/{trial_id}/{correlation_id}/chat/completions
+        path_segments = [s for s in parsed.path.split("/") if s]
+        if len(path_segments) >= 2:
+            # Check if path ends with chat/completions
+            if path_segments[-2:] == ["chat", "completions"] and len(path_segments) >= 3:
+                # correlation_id is the segment before chat/completions
+                potential_cid = path_segments[-3]
+                # Verify it looks like a correlation ID (starts with trace_ or cid_)
+                if potential_cid.startswith("trace_") or potential_cid.startswith("cid_"):
+                    logger.info(
+                        "extract_trace_correlation_id: extracted from URL path=%s",
+                        potential_cid,
+                    )
+                    return potential_cid.strip()
+        # 2. Fall back to query param extraction (legacy format)
         query_params = parse_qs(parsed.query or "")
         # Try multiple possible query param names
         for param_name in ["cid", "trace_correlation_id", "trace"]:
@@ -193,11 +211,11 @@ def include_trace_correlation_id_in_response(
 ) -> dict[str, Any]:
     """
     Include trace_correlation_id in all required locations of rollout response.
-    Required locations (per Fatal Guards section):
+    Required locations (trace-only):
     1. Top-level response["trace_correlation_id"]
     2. response["pipeline_metadata"]["trace_correlation_id"]
-    3. Each trajectory["trace_correlation_id"]
+    3. response["trace"]["metadata"]["trace_correlation_id"] (and session_trace metadata if present)
     Args:
         response_data: RolloutResponse dict (from .model_dump())
@@ -238,32 +256,42 @@ def include_trace_correlation_id_in_response(
             trace_correlation_id
         )
-    # 3. Add to each trajectory (REQUIRED)
-    trajectories = response_data.get("trajectories", [])
-    if isinstance(trajectories, list):
-        for idx, traj in enumerate(trajectories):
-            if isinstance(traj, dict) and "trace_correlation_id" not in traj:
-                traj["trace_correlation_id"] = trace_correlation_id
-                logger.debug(
-                    "include_trace_correlation_id: added to trajectory[%d] run_id=%s cid=%s",
-                    idx,
-                    run_id,
-                    trace_correlation_id
-                )
+    # 3. Add to trace metadata (REQUIRED)
+    trace_block = response_data.get("trace")
+    if isinstance(trace_block, dict):
+        trace_meta = trace_block.get("metadata")
+        if not isinstance(trace_meta, dict):
+            trace_meta = {}
+            trace_block["metadata"] = trace_meta
+        if "trace_correlation_id" not in trace_meta:
+            trace_meta["trace_correlation_id"] = trace_correlation_id
+        corr_ids = trace_meta.get("correlation_ids")
+        if isinstance(corr_ids, dict):
+            corr_map = dict(corr_ids)
+        else:
+            corr_map = {}
+        corr_map.setdefault("trace_correlation_id", trace_correlation_id)
+        trace_meta["correlation_ids"] = corr_map
+        session_trace = trace_block.get("session_trace")
+        if isinstance(session_trace, dict):
+            session_meta = session_trace.get("metadata")
+            if not isinstance(session_meta, dict):
+                session_meta = {}
+                session_trace["metadata"] = session_meta
+            session_meta.setdefault("trace_correlation_id", trace_correlation_id)
     logger.info(
         "include_trace_correlation_id: completed run_id=%s cid=%s "
-        "added to %d locations (top-level, metadata, %d trajectories)",
+        "added to top-level, metadata, and trace",
         run_id,
         trace_correlation_id,
-        2 + len(trajectories),
-        len(trajectories)
     )
     return response_data
-def build_trajectory_trace(
+def build_trace_payload(
     messages: list[dict[str, Any]],
     response: dict[str, Any] | None = None,
     *,
@@ -272,11 +300,7 @@ def build_trajectory_trace(
     metadata: dict[str, Any] | None = None,
 ) -> dict[str, Any]:
     """
-    Build a trajectory-level trace with event_history for trace strict mode.
-    This creates the trace structure required by monorepo's trace_validation.py:
-    - trajectory.trace.event_history must be non-empty
-    - event_history contains LM call records for input/output extraction
+    Build a v3 trace payload with event_history for trace-only responses.
     Args:
         messages: The messages sent to the LLM (input)
@@ -286,160 +310,178 @@ def build_trajectory_trace(
         metadata: Optional additional metadata
     Returns:
-        A trace dict with event_history suitable for trajectory.trace
-    Example:
-        trace = build_trajectory_trace(
-            messages=[{"role": "user", "content": "Hello"}],
-            response={"choices": [{"message": {"content": "Hi!"}}]},
-            correlation_id="trace_abc123",
-        )
-        trajectory = RolloutTrajectory(..., trace=trace)
+        A trace dict with event_history suitable for RolloutResponse.trace
     """
     import uuid
     from datetime import datetime
-    # Build event_history with LM call record
     event_history: list[dict[str, Any]] = []
-    # Create an LM call event (the primary event type for input/output extraction)
-    lm_event: dict[str, Any] = {
+    llm_response: dict[str, Any] = {}
+    if isinstance(response, dict):
+        if "message" in response:
+            llm_response = dict(response)
+        elif "choices" in response and isinstance(response.get("choices"), list) and response["choices"]:
+            first_choice = response["choices"][0] if isinstance(response["choices"][0], dict) else {}
+            llm_response = {
+                "message": first_choice.get("message") if isinstance(first_choice, dict) else {},
+                "usage": response.get("usage", {}),
+                "finish_reason": first_choice.get("finish_reason") if isinstance(first_choice, dict) else None,
+            }
+        else:
+            llm_response = dict(response)
+    llm_event: dict[str, Any] = {
+        "type": "lm_call",
         "event_type": "lm_call",
         "timestamp": datetime.now(UTC).isoformat(),
-        "call_record": {
-            "messages": messages,
-            "response": response or {},
-        },
+        "llm_request": {"messages": messages},
+        "llm_response": llm_response,
     }
     # Add correlation ID if provided
     if correlation_id:
-        lm_event["correlation_id"] = correlation_id
+        llm_event["correlation_id"] = correlation_id
-    event_history.append(lm_event)
+    event_history.append(llm_event)
+    trace_metadata: dict[str, Any] = dict(metadata or {})
+    trace_metadata.setdefault("session_id", session_id or str(uuid.uuid4()))
+    if correlation_id:
+        trace_metadata.setdefault("trace_correlation_id", correlation_id)
+        corr_ids = trace_metadata.get("correlation_ids")
+        if isinstance(corr_ids, dict):
+            corr_map = dict(corr_ids)
+        else:
+            corr_map = {}
+        corr_map.setdefault("trace_correlation_id", correlation_id)
+        trace_metadata["correlation_ids"] = corr_map
     trace: dict[str, Any] = {
-        "session_id": session_id or str(uuid.uuid4()),
+        "schema_version": "3.0",
         "event_history": event_history,
-        "created_at": datetime.now(UTC).isoformat(),
+        "markov_blanket_message_history": [],
+        "metadata": trace_metadata,
     }
-    if correlation_id:
-        trace["correlation_id"] = correlation_id
-    if metadata:
-        trace["metadata"] = metadata
     logger.debug(
-        "build_trajectory_trace: created trace with %d events, session_id=%s, cid=%s",
+        "build_trace_payload: created trace with %d events, session_id=%s, cid=%s",
         len(event_history),
-        trace["session_id"],
+        trace_metadata.get("session_id"),
         correlation_id,
     )
     return trace
-def include_event_history_in_trajectories(
+def build_trajectory_trace(
+    messages: list[dict[str, Any]],
+    response: dict[str, Any] | None = None,
+    *,
+    correlation_id: str | None = None,
+    session_id: str | None = None,
+    metadata: dict[str, Any] | None = None,
+) -> dict[str, Any]:
+    """Backward-compatible alias for build_trace_payload."""
+    return build_trace_payload(
+        messages=messages,
+        response=response,
+        correlation_id=correlation_id,
+        session_id=session_id,
+        metadata=metadata,
+    )
+def include_event_history_in_response(
     response_data: dict[str, Any],
-    messages_by_trajectory: list[list[dict[str, Any]]] | None = None,
-    responses_by_trajectory: list[dict[str, Any]] | None = None,
+    messages: list[dict[str, Any]] | None = None,
+    response: dict[str, Any] | None = None,
     *,
     run_id: str,
     correlation_id: str | None = None,
 ) -> dict[str, Any]:
     """
-    Ensure all trajectories have trace.event_history for trace strict mode.
-    This satisfies monorepo's trace_validation.py requirement:
-    - validate_response_has_hydrated_trace() checks for event_history
+    Ensure response.trace includes a v3 event_history payload.
     Args:
         response_data: RolloutResponse dict (from .model_dump())
-        messages_by_trajectory: List of messages for each trajectory (for building event_history)
-        responses_by_trajectory: List of LLM responses for each trajectory
+        messages: Messages for the LLM call (for building event_history)
+        response: LLM response payload
         run_id: Rollout run_id for logging
         correlation_id: Trace correlation ID
     Returns:
-        Modified response_data with event_history in each trajectory.trace
+        Modified response_data with event_history in response.trace
     """
-    trajectories = response_data.get("trajectories", [])
-    if not isinstance(trajectories, list):
-        logger.warning(
-            "include_event_history_in_trajectories: trajectories is not a list for run_id=%s",
-            run_id,
-        )
-        return response_data
+    trace_block = response_data.get("trace")
+    if not isinstance(trace_block, dict):
+        trace_block = {}
+        response_data["trace"] = trace_block
-    for idx, traj in enumerate(trajectories):
-        if not isinstance(traj, dict):
-            continue
+    event_history = trace_block.get("event_history")
+    session_trace = trace_block.get("session_trace")
+    if not event_history and isinstance(session_trace, dict):
+        event_history = session_trace.get("event_history")
-        # Get existing trace or create new one
-        trace = traj.get("trace")
-        if not isinstance(trace, dict):
-            trace = {}
-            traj["trace"] = trace
+    if isinstance(event_history, list) and event_history:
+        return response_data
-        # Check if event_history already exists and is non-empty
-        event_history = trace.get("event_history")
-        if isinstance(event_history, list) and len(event_history) > 0:
-            logger.debug(
-                "include_event_history_in_trajectories: trajectory[%d] already has "
-                "%d events, skipping run_id=%s",
-                idx,
-                len(event_history),
-                run_id,
-            )
-            continue
+    new_trace = build_trace_payload(
+        messages=messages or [],
+        response=response,
+        correlation_id=correlation_id,
+        metadata={"run_id": run_id},
+    )
-        # Build event_history from provided messages/responses
-        messages = (
-            messages_by_trajectory[idx]
-            if messages_by_trajectory and idx < len(messages_by_trajectory)
-            else []
-        )
-        response = (
-            responses_by_trajectory[idx]
-            if responses_by_trajectory and idx < len(responses_by_trajectory)
-            else None
-        )
+    # Merge new trace payload into the existing trace block.
+    trace_meta = trace_block.get("metadata")
+    if isinstance(trace_meta, dict):
+        merged_meta = dict(new_trace.get("metadata", {}))
+        merged_meta.update(trace_meta)
+        trace_block["metadata"] = merged_meta
+    else:
+        trace_block["metadata"] = new_trace.get("metadata", {})
-        # If no messages provided, try to extract from trajectory steps
-        if not messages:
-            steps = traj.get("steps", [])
-            for step in steps:
-                if isinstance(step, dict):
-                    obs = step.get("obs", {})
-                    if isinstance(obs, dict):
-                        step_messages = obs.get("messages")
-                        if isinstance(step_messages, list):
-                            messages = step_messages
-                            break
-        # Build the trace with event_history
-        new_trace = build_trajectory_trace(
-            messages=messages,
-            response=response,
-            correlation_id=correlation_id or traj.get("trace_correlation_id"),
-            metadata={"run_id": run_id, "trajectory_index": idx},
-        )
+    trace_block.setdefault("schema_version", new_trace.get("schema_version"))
+    trace_block["event_history"] = new_trace.get("event_history", [])
+    trace_block.setdefault(
+        "markov_blanket_message_history",
+        new_trace.get("markov_blanket_message_history", []),
+    )
-        # Merge with existing trace (preserve existing fields)
-        trace.update(new_trace)
-        logger.info(
-            "include_event_history_in_trajectories: added event_history to "
-            "trajectory[%d] run_id=%s events=%d",
-            idx,
-            run_id,
-            len(trace.get("event_history", [])),
-        )
+    if isinstance(session_trace, dict) and "event_history" not in session_trace:
+        session_trace["event_history"] = trace_block["event_history"]
+    logger.info(
+        "include_event_history_in_response: added event_history run_id=%s events=%d",
+        run_id,
+        len(trace_block.get("event_history", [])),
+    )
     return response_data
+def include_event_history_in_trajectories(
+    response_data: dict[str, Any],
+    messages_by_trajectory: list[list[dict[str, Any]]] | None = None,
+    responses_by_trajectory: list[dict[str, Any]] | None = None,
+    *,
+    run_id: str,
+    correlation_id: str | None = None,
+) -> dict[str, Any]:
+    """Backward-compatible alias for include_event_history_in_response."""
+    messages = messages_by_trajectory[0] if messages_by_trajectory else None
+    response = responses_by_trajectory[0] if responses_by_trajectory else None
+    return include_event_history_in_response(
+        response_data,
+        messages=messages,
+        response=response,
+        run_id=run_id,
+        correlation_id=correlation_id,
+    )
 def verify_trace_correlation_id_in_response(
     response_data: dict[str, Any],
     expected_correlation_id: str | None,
@@ -480,15 +522,24 @@ def verify_trace_correlation_id_in_response(
             f"expected={expected_correlation_id} actual={pipeline_meta.get('trace_correlation_id') if isinstance(pipeline_meta, dict) else 'NOT_A_DICT'}"
         )
-    # Check trajectories
-    trajectories = response_data.get("trajectories", [])
-    if isinstance(trajectories, list):
-        for idx, traj in enumerate(trajectories):
-            if isinstance(traj, dict) and traj.get("trace_correlation_id") != expected_correlation_id:
-                errors.append(
-                    f"trajectory[{idx}] missing or mismatch: "
-                    f"expected={expected_correlation_id} actual={traj.get('trace_correlation_id')}"
-                )
+    # Check trace metadata
+    trace_block = response_data.get("trace")
+    trace_meta_id = None
+    if isinstance(trace_block, dict):
+        trace_meta = trace_block.get("metadata")
+        if isinstance(trace_meta, dict):
+            trace_meta_id = trace_meta.get("trace_correlation_id")
+        if trace_meta_id != expected_correlation_id:
+            session_trace = trace_block.get("session_trace")
+            if isinstance(session_trace, dict):
+                session_meta = session_trace.get("metadata")
+                if isinstance(session_meta, dict):
+                    trace_meta_id = session_meta.get("trace_correlation_id")
+        if trace_meta_id != expected_correlation_id:
+            errors.append(
+                "trace.metadata missing or mismatch: "
+                f"expected={expected_correlation_id} actual={trace_meta_id}"
+            )
     if errors:
         logger.error(

synth_ai/sdk/task/validators.py CHANGED Viewed

@@ -16,8 +16,8 @@ def validate_rollout_response_for_rl(response_data: dict[str, Any], *, warn_only
     """Validate that a task app rollout response has required fields for RL training.
     The backend RL trainer requires:
-    1. pipeline_metadata["inference_url"] at top level (with ?cid= for trace correlation)
-    2. Each step's info.meta["inference_url"] must be present (nested structure!)
+    1. A v3 trace with event_history (preferred), OR
+    2. pipeline_metadata["inference_url"] with ?cid= for trace hydration fallback
     Args:
         response_data: The rollout response dict from task app
@@ -31,16 +31,43 @@ def validate_rollout_response_for_rl(response_data: dict[str, Any], *, warn_only
     """
     issues = []
-    # Check pipeline_metadata
+    trace_block = response_data.get("trace")
+    event_history = None
+    if isinstance(trace_block, dict):
+        event_history = trace_block.get("event_history")
+        if not event_history and isinstance(trace_block.get("session_trace"), dict):
+            event_history = trace_block["session_trace"].get("event_history")
+    has_event_history = isinstance(event_history, list) and len(event_history) > 0
+    trace_correlation_id = response_data.get("trace_correlation_id")
+    if not trace_correlation_id and isinstance(trace_block, dict):
+        trace_meta = trace_block.get("metadata")
+        if isinstance(trace_meta, dict):
+            trace_correlation_id = trace_meta.get("trace_correlation_id")
+    if not trace_correlation_id:
+        issues.append(
+            "Missing trace_correlation_id (top-level or trace.metadata). "
+            "RL trainer requires this to link traces."
+        )
+    if not has_event_history:
+        issues.append(
+            "trace.event_history is missing or empty. "
+            "Return a v3 trace or provide inference_url for hydration."
+        )
+    # Check pipeline_metadata inference_url only when trace is missing/empty
     pipeline_metadata = response_data.get("pipeline_metadata")
-    if not isinstance(pipeline_metadata, dict):
-        issues.append("Missing or invalid 'pipeline_metadata' (required for RL training)")
-    else:
+    inference_url = None
+    if isinstance(pipeline_metadata, dict):
         inference_url = pipeline_metadata.get("inference_url")
+    if not has_event_history:
         if not inference_url:
             issues.append(
                 "pipeline_metadata['inference_url'] is missing. "
-                "RL trainer requires this field to extract traces."
+                "RL trainer needs this to hydrate traces when event_history is absent."
             )
         elif not isinstance(inference_url, str):
             issues.append(
@@ -52,48 +79,6 @@ def validate_rollout_response_for_rl(response_data: dict[str, Any], *, warn_only
                 f"Got: {inference_url[:80]}..."
             )
-    # Check trajectories and steps
-    trajectories = response_data.get("trajectories", [])
-    if not trajectories:
-        issues.append("No trajectories found in response")
-    for traj_idx, trajectory in enumerate(trajectories):
-        if not isinstance(trajectory, dict):
-            continue
-        steps = trajectory.get("steps", [])
-        for step_idx, step in enumerate(steps):
-            if not isinstance(step, dict):
-                continue
-            step_info = step.get("info", {})
-            if not isinstance(step_info, dict):
-                issues.append(
-                    f"trajectory[{traj_idx}].steps[{step_idx}].info is not a dict"
-                )
-                continue
-            # Check for nested meta.inference_url (backend expects this structure!)
-            step_meta = step_info.get("meta", {})
-            if not isinstance(step_meta, dict):
-                issues.append(
-                    f"trajectory[{traj_idx}].steps[{step_idx}].info.meta is missing or not a dict. "
-                    f"RL trainer expects nested structure: info.meta.inference_url"
-                )
-                continue
-            step_inference_url = step_meta.get("inference_url")
-            if not step_inference_url:
-                issues.append(
-                    f"trajectory[{traj_idx}].steps[{step_idx}].info.meta['inference_url'] is missing. "
-                    f"RL trainer needs this for trace extraction (nested structure required!)"
-                )
-            elif not isinstance(step_inference_url, str):
-                issues.append(
-                    f"trajectory[{traj_idx}].steps[{step_idx}].info.meta['inference_url'] must be a string, "
-                    f"got: {type(step_inference_url).__name__}"
-                )
     if issues and not warn_only:
         error_msg = "Task app response validation failed for RL training:\n" + "\n".join(
             f"  - {issue}" for issue in issues

synth_ai/sdk/training/__init__.py CHANGED Viewed

@@ -4,7 +4,7 @@ This module provides high-level APIs for running training jobs:
 - PromptLearningJob: GEPA and MIPRO prompt optimization
 - SFTJob: Supervised fine-tuning
 - RLJob: Reinforcement learning (GSPO, GRPO, PPO, etc.)
-- GraphGenJob: Automated Design of Agentic Systems (simplified workflows API)
+- GraphGenJob: Graph Opt (simplified workflows API)
 Example:
     from synth_ai.sdk.training import PromptLearningJob, RLJob, GraphGenJob
@@ -30,7 +30,7 @@ Example:
 from __future__ import annotations
 # Pollers and utilities
-from synth_ai.sdk.api.train.pollers import JobPoller, PollOutcome, RLJobPoller
+from synth_ai.sdk.api.train.pollers import JobPoller, PollOutcome, RLJobPoller, EvalJobPoller
 # Re-export from existing locations
 from synth_ai.sdk.api.train.prompt_learning import (
@@ -41,7 +41,7 @@ from synth_ai.sdk.api.train.prompt_learning import (
 from synth_ai.sdk.api.train.rl import RLJob, RLJobConfig
 from synth_ai.sdk.api.train.sft import SFTJob
-# GraphGen (formerly GraphGen)
+# GraphGen (Graph Opt)
 from synth_ai.sdk.api.train.graphgen import GraphGenJob, GraphGenJobResult, GraphGenSubmitResult
 from synth_ai.sdk.api.train.graphgen_models import (
     GraphGenJobConfig,
@@ -49,16 +49,7 @@ from synth_ai.sdk.api.train.graphgen_models import (
     GraphGenTask,
     GraphGenGoldOutput,
     GraphGenRubric,
-    GraphGenJudgeConfig,
-    load_graphgen_taskset,
-    parse_graphgen_taskset,
-    # GraphGen aliases
-    GraphGenJobConfig,
-    GraphGenTaskSet,
-    GraphGenTask,
-    GraphGenGoldOutput,
-    GraphGenRubric,
-    GraphGenJudgeConfig,
+    GraphGenVerifierConfig,
     load_graphgen_taskset,
     parse_graphgen_taskset,
 )
@@ -80,7 +71,7 @@ __all__ = [
     "GraphGenTask",
     "GraphGenGoldOutput",
     "GraphGenRubric",
-    "GraphGenJudgeConfig",
+    "GraphGenVerifierConfig",
     "load_graphgen_taskset",
     "parse_graphgen_taskset",
     # GraphGen (legacy aliases)
@@ -92,11 +83,11 @@ __all__ = [
     "GraphGenTask",
     "GraphGenGoldOutput",
     "GraphGenRubric",
-    "GraphGenJudgeConfig",
+    "GraphGenVerifierConfig",
     "load_graphgen_taskset",
     "parse_graphgen_taskset",
     # Utils
     "JobPoller",
     "PollOutcome",
+    "EvalJobPoller",
 ]

synth-ai 0.4.1__py3-none-any.whl → 0.4.4__py3-none-any.whl

Potentially problematic release.

synth-ai 0.4.1py3-none-any.whl → 0.4.4py3-none-any.whl