PyPI - synth-ai - Versions diffs - 0.2.13.dev2__py3-none-any.whl → 0.2.14__py3-none-any.whl - Mend

synth-ai 0.2.13.dev2py3-none-any.whl → 0.2.14py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of synth-ai might be problematic. Click here for more details.

Files changed (110) hide show

examples/multi_step/configs/README_verilog_rl.md +77 -0
examples/multi_step/configs/VERILOG_REWARDS.md +90 -0
examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +183 -0
examples/multi_step/configs/crafter_eval_synth_qwen4b.toml +35 -0
examples/multi_step/configs/crafter_eval_text_only_groq_qwen32b.toml +36 -0
examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +5 -4
examples/multi_step/configs/crafter_synth_backend.md +40 -0
examples/multi_step/configs/verilog_eval_groq_qwen32b.toml +31 -0
examples/multi_step/configs/verilog_eval_synth_qwen8b.toml +33 -0
examples/multi_step/configs/verilog_rl_lora.toml +190 -0
examples/multi_step/judges/crafter_backend_judge.py +220 -0
examples/multi_step/judges/verilog_backend_judge.py +234 -0
examples/multi_step/readme.md +48 -0
examples/multi_step/verilog_rl_lora.md +218 -0
examples/qwen_coder/configs/coder_lora_30b.toml +1 -1
examples/sft/evaluate.py +2 -0
examples/sft/generate_traces.py +2 -0
examples/swe/task_app/grpo_swe_mini.py +1 -0
examples/swe/task_app/hosted/rollout.py +2 -0
examples/task_apps/IMAGE_ONLY_EVAL_QUICKSTART.md +258 -0
examples/task_apps/crafter/CREATE_SFT_DATASET.md +273 -0
examples/task_apps/crafter/EVAL_IMAGE_ONLY_RESULTS.md +152 -0
examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +174 -0
examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +268 -0
examples/task_apps/crafter/QUERY_EXAMPLES.md +203 -0
examples/task_apps/crafter/README_IMAGE_ONLY_EVAL.md +316 -0
examples/task_apps/crafter/eval_image_only_gpt4o.toml +28 -0
examples/task_apps/crafter/eval_text_only_groq_llama.toml +36 -0
examples/task_apps/crafter/filter_sft_dataset.toml +16 -0
examples/task_apps/crafter/task_app/__init__.py +3 -0
examples/task_apps/crafter/task_app/grpo_crafter.py +306 -8
examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/environment.py +10 -0
examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +16 -3
examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +17 -2
examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +25 -3
examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +52 -1
examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +111 -13
examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +156 -0
examples/task_apps/enron/filter_sft.toml +5 -0
examples/task_apps/enron/tests/__init__.py +2 -0
examples/task_apps/enron/tests/integration/__init__.py +2 -0
examples/task_apps/enron/tests/integration/test_enron_eval.py +2 -0
examples/task_apps/enron/tests/unit/__init__.py +2 -0
examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_COMPLETE.md +283 -0
examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_STATUS.md +155 -0
examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +415 -0
examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +29 -0
examples/task_apps/pokemon_red/pallet_town_rl_config.toml +2 -0
examples/task_apps/pokemon_red/task_app.py +199 -6
examples/task_apps/pokemon_red/test_pallet_town_rewards.py +2 -0
examples/task_apps/sokoban/filter_sft.toml +5 -0
examples/task_apps/sokoban/tests/__init__.py +2 -0
examples/task_apps/sokoban/tests/integration/__init__.py +2 -0
examples/task_apps/sokoban/tests/unit/__init__.py +2 -0
examples/task_apps/verilog/eval_groq_qwen32b.toml +8 -4
examples/task_apps/verilog/filter_sft.toml +5 -0
examples/task_apps/verilog/task_app/grpo_verilog.py +258 -23
examples/task_apps/verilog/tests/__init__.py +2 -0
examples/task_apps/verilog/tests/integration/__init__.py +2 -0
examples/task_apps/verilog/tests/integration/test_verilog_eval.py +2 -0
examples/task_apps/verilog/tests/unit/__init__.py +2 -0
examples/warming_up_to_rl/groq_test.py +2 -0
examples/warming_up_to_rl/run_local_rollout.py +2 -0
examples/warming_up_to_rl/run_local_rollout_modal.py +2 -0
examples/warming_up_to_rl/run_local_rollout_parallel.py +2 -0
examples/warming_up_to_rl/run_local_rollout_traced.py +2 -0
examples/warming_up_to_rl/run_rollout_remote.py +2 -0
synth_ai/api/models/supported.py +1 -0
synth_ai/cli/__init__.py +46 -13
synth_ai/cli/_modal_wrapper.py +3 -2
synth_ai/cli/recent.py +1 -1
synth_ai/cli/status.py +1 -1
synth_ai/cli/task_apps.py +354 -143
synth_ai/cli/traces.py +1 -1
synth_ai/cli/tui.py +57 -0
synth_ai/cli/turso.py +1 -1
synth_ai/cli/watch.py +1 -1
synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
synth_ai/environments/examples/crafter_classic/environment.py +1 -1
synth_ai/environments/examples/verilog/engine.py +76 -10
synth_ai/judge_schemas.py +8 -8
synth_ai/task/__init__.py +11 -1
synth_ai/task/apps/__init__.py +1 -0
synth_ai/task/config.py +257 -0
synth_ai/task/contracts.py +15 -2
synth_ai/task/rubrics/__init__.py +3 -0
synth_ai/task/rubrics/loaders.py +22 -3
synth_ai/task/rubrics/scoring.py +3 -0
synth_ai/task/trace_correlation_helpers.py +315 -0
synth_ai/task/validators.py +144 -0
synth_ai/tracing_v3/abstractions.py +3 -3
synth_ai/tracing_v3/llm_call_record_helpers.py +5 -5
synth_ai/tracing_v3/session_tracer.py +16 -6
synth_ai/tracing_v3/storage/base.py +29 -29
synth_ai/tracing_v3/storage/config.py +3 -3
synth_ai/tracing_v3/turso/daemon.py +8 -7
synth_ai/tracing_v3/turso/native_manager.py +63 -40
synth_ai/tracing_v3/utils.py +3 -3
synth_ai/tui/__init__.py +5 -0
synth_ai/tui/__main__.py +13 -0
synth_ai/tui/cli/__init__.py +1 -0
synth_ai/tui/cli/query_experiments.py +164 -0
synth_ai/tui/cli/query_experiments_v3.py +164 -0
synth_ai/tui/dashboard.py +906 -0
{synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.14.dist-info}/METADATA +1 -1
{synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.14.dist-info}/RECORD +110 -71
{synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.14.dist-info}/WHEEL +0 -0
{synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.14.dist-info}/entry_points.txt +0 -0
{synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.14.dist-info}/licenses/LICENSE +0 -0
{synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.14.dist-info}/top_level.txt +0 -0

synth_ai/task/contracts.py CHANGED Viewed

@@ -1,11 +1,18 @@
 from __future__ import annotations
 from dataclasses import dataclass
+from enum import Enum
 from typing import Any, Literal
 from pydantic import BaseModel, ConfigDict, Field
+class RolloutMode(str, Enum):
+    """Mode controls how rollout infrastructure processes inference URLs."""
+    RL = "rl"
+    EVAL = "eval"
 @dataclass(frozen=True)
 class TaskAppEndpoints:
     """Required Task App endpoints used by RL trainers and clients.
@@ -43,7 +50,7 @@ class RolloutRecordConfig(BaseModel):
     logprobs: bool = False
     value: bool = False
     return_trace: bool = False
-    trace_format: Literal["compact", "full"] = "compact"
+    trace_format: Literal["compact", "full", "structured"] = "compact"
 class RolloutSafetyConfig(BaseModel):
@@ -61,6 +68,7 @@ class RolloutRequest(BaseModel):
     safety: RolloutSafetyConfig = RolloutSafetyConfig()
     training_session_id: str | None = None
     synth_base_url: str | None = None
+    mode: RolloutMode  # Required: explicit RL vs EVAL mode
 class RolloutStep(BaseModel):
@@ -110,7 +118,7 @@ class RolloutTrajectory(BaseModel):
     # Required for trace correlation with inference mesh (optional initially for backward compat)
     # See: monorepo/INFERENCE_URL_REQUIREMENT_PLAN.md and trace_creation_and_judgement.txt
-    inference_url: str | None = None
+    inference_url: str
     decision_samples: list[dict[str, Any]] | None = None
@@ -143,10 +151,15 @@ class RolloutResponse(BaseModel):
     aborted: bool = False
     ops_executed: int = 0
+    # OPTIONAL: correlation ID for linking rollout to inference traces
+    # If not provided, trainer will infer it from trajectory.inference_url ?cid=... parameter
+    trace_correlation_id: str | None = None
     # PREFERRED: v3 trace format (SessionTrace). This is the single source of truth
     # for rollout data and should be used by all new code. Contains richer data than
     # trajectories including token IDs, logprobs, timing, and multimodal content.
     trace: dict[str, Any] | None = None
+    pipeline_metadata: dict[str, Any] = Field(default_factory=dict)
 class _ExtraAllowModel(BaseModel):

synth_ai/task/rubrics/__init__.py CHANGED Viewed

@@ -51,3 +51,6 @@ __all__ = [
 RubricCriterion = StrictCriterion
 RubricSpec = StrictRubric

synth_ai/task/rubrics/loaders.py CHANGED Viewed

@@ -60,15 +60,34 @@ def load_rubric(source: str | dict[str, Any] | Rubric | None) -> Rubric | None:
     Returns:
         Parsed Rubric instance or None if source is None
+    Raises:
+        ValueError: If the rubric format is incorrect (e.g., backend judge format)
+        ValidationError: If the rubric fails schema validation
     """
     if source is None:
         return None
     if isinstance(source, Rubric):
         return source
+    # Load and parse the data
     if isinstance(source, dict):
-        return Rubric.model_validate(source)
-    text, suffix = _load_text(str(source))
-    data = _parse_structured(text, suffix)
+        data = source
+    else:
+        text, suffix = _load_text(str(source))
+        data = _parse_structured(text, suffix)
+    # Check if this looks like a backend judge rubric (wrong format)
+    if isinstance(data, dict) and "event" in data and "outcome" in data:
+        # Missing required task app rubric fields
+        if "version" not in data and "goal_text" not in data and "criteria" not in data:
+            source_hint = f" ({source})" if isinstance(source, str) else ""
+            raise ValueError(
+                f"Rubric appears to be in backend judge format (has 'event'/'outcome' keys){source_hint}. "
+                f"Task apps require rubrics with 'version', 'goal_text', and 'criteria' fields. "
+                f"Backend judge rubrics should be named '*_backend_judge.json' and loaded by judge functions."
+            )
     return Rubric.model_validate(data)

synth_ai/task/rubrics/scoring.py CHANGED Viewed

@@ -111,3 +111,6 @@ def score_outcome_against_rubric(outcome: dict[str, Any], rubric: Rubric | None)
                     values[str(key)] = score
     return _score(rubric.criteria, values, rubric.aggregation)

synth_ai/task/trace_correlation_helpers.py ADDED Viewed

@@ -0,0 +1,315 @@
+"""Helpers for trace correlation ID extraction and inclusion in task apps.
+This module provides utilities for task apps to:
+1. Extract trace_correlation_id from rollout requests
+2. Include trace_correlation_id in rollout responses (3 required locations)
+See monorepo/trace_creation_and_judgement.txt "Fatal Guards" section for requirements.
+"""
+import logging
+from typing import Any
+from urllib.parse import parse_qs, urlparse
+logger = logging.getLogger(__name__)
+def extract_trace_correlation_id(
+    policy_config: dict[str, Any],
+    inference_url: str | None = None,
+    mode: Any = None
+) -> str | None:
+    """
+    Extract trace_correlation_id from policy config or inference URL.
+    This is the standardized method for all task apps to extract the correlation ID
+    that the RL trainer generates and passes to the task app.
+    Args:
+        policy_config: Policy configuration dict from RolloutRequest.policy.config
+        inference_url: Inference URL (optional, used as fallback)
+        mode: RolloutMode or string ("rl" or "eval"). Controls warning behavior -
+              warnings only logged for RL mode, not EVAL mode.
+    Returns:
+        trace_correlation_id if found, None otherwise
+    Extraction order:
+        1. policy_config["trace_correlation_id"] (preferred)
+        2. policy_config["trace"] (legacy fallback)
+        3. URL query param ?cid=... (fallback)
+        4. URL query param ?trace_correlation_id=... (fallback)
+    """
+    # Try policy_config first (preferred method)
+    candidates: list[Any] = [
+        policy_config.get("trace_correlation_id"),
+        policy_config.get("trace"),
+    ]
+    logger.debug(
+        "extract_trace_correlation_id: policy_cfg keys=%s candidates=%s",
+        sorted(policy_config.keys()),
+        candidates,
+    )
+    for candidate in candidates:
+        if isinstance(candidate, str):
+            stripped = candidate.strip()
+            if stripped:
+                logger.info(
+                    "extract_trace_correlation_id: extracted from policy_config=%s",
+                    stripped
+                )
+                return stripped
+    # Determine if we're in EVAL mode (trace_correlation_id not required for eval)
+    try:
+        from synth_ai.task.contracts import RolloutMode
+        is_eval_mode = (mode == "eval" or mode == RolloutMode.EVAL or
+                        (hasattr(mode, 'value') and mode.value == "eval"))
+    except ImportError:
+        # If RolloutMode not available, fall back to string comparison
+        is_eval_mode = (mode == "eval")
+    # Fallback: try to extract from inference_url query params
+    if not inference_url or not isinstance(inference_url, str):
+        if is_eval_mode:
+            logger.debug(
+                "extract_trace_correlation_id: no correlation ID found in policy_config "
+                "and no inference_url provided (EVAL mode - expected)"
+            )
+        else:
+            logger.warning(
+                "extract_trace_correlation_id: no correlation ID found in policy_config "
+                "and no inference_url provided"
+            )
+        return None
+    try:
+        parsed = urlparse(inference_url)
+        query_params = parse_qs(parsed.query or "")
+        # Try multiple possible query param names
+        for param_name in ["cid", "trace_correlation_id", "trace"]:
+            values = query_params.get(param_name, [])
+            for value in values:
+                if isinstance(value, str) and value.strip():
+                    correlation_id = value.strip()
+                    logger.info(
+                        "extract_trace_correlation_id: extracted from URL param %s=%s",
+                        param_name,
+                        correlation_id,
+                    )
+                    return correlation_id
+    except Exception as e:
+        logger.warning(
+            "extract_trace_correlation_id: failed to parse inference_url=%s error=%s",
+            inference_url,
+            e,
+        )
+    if is_eval_mode:
+        logger.debug(
+            "extract_trace_correlation_id: no trace_correlation_id found in "
+            "policy_config or inference_url=%s (EVAL mode - expected)",
+            inference_url,
+        )
+    else:
+        logger.warning(
+            "extract_trace_correlation_id: no trace_correlation_id found in "
+            "policy_config or inference_url=%s",
+            inference_url,
+        )
+    return None
+def validate_trace_correlation_id(
+    trace_correlation_id: str | None,
+    run_id: str,
+    policy_config: dict[str, Any],
+    fatal: bool = False
+) -> str | None:
+    """
+    Validate that trace_correlation_id was successfully extracted.
+    Args:
+        trace_correlation_id: The extracted correlation ID (or None)
+        run_id: Rollout run_id for logging
+        policy_config: Policy configuration for debugging
+        fatal: If True, raise ValueError on missing ID. If False, log error only.
+    Returns:
+        trace_correlation_id if present, None if missing (when fatal=False)
+    Raises:
+        ValueError: If trace_correlation_id is missing and fatal=True
+    """
+    if not trace_correlation_id:
+        error_msg = (
+            f"🚨 CRITICAL: Cannot extract trace_correlation_id!\n"
+            "\n"
+            f"Run ID: {run_id}\n"
+            f"Policy config keys: {sorted(policy_config.keys())}\n"
+            f"Inference URL: {policy_config.get('inference_url', 'NOT_SET')}\n"
+            "\n"
+            "Checked:\n"
+            f"1. policy_config['trace_correlation_id']: {policy_config.get('trace_correlation_id')}\n"
+            f"2. policy_config['trace']: {policy_config.get('trace')}\n"
+            f"3. inference_url query params\n"
+            "\n"
+            "Task app CANNOT proceed without trace_correlation_id.\n"
+            "This indicates the RL trainer is not sending it correctly.\n"
+            "\n"
+            "See monorepo/trace_creation_and_judgement.txt 'Fatal Guards' section.\n"
+        )
+        if fatal:
+            raise ValueError(error_msg)
+        else:
+            logger.error(error_msg)
+    return trace_correlation_id
+def include_trace_correlation_id_in_response(
+    response_data: dict[str, Any],
+    trace_correlation_id: str | None,
+    run_id: str
+) -> dict[str, Any]:
+    """
+    Include trace_correlation_id in all required locations of rollout response.
+    Required locations (per Fatal Guards section):
+    1. Top-level response["trace_correlation_id"]
+    2. response["pipeline_metadata"]["trace_correlation_id"]
+    3. Each trajectory["trace_correlation_id"]
+    Args:
+        response_data: RolloutResponse dict (from .model_dump())
+        trace_correlation_id: The correlation ID to include
+        run_id: Rollout run_id for logging
+    Returns:
+        Modified response_data with trace_correlation_id in all required places
+    """
+    if not trace_correlation_id:
+        logger.error(
+            "include_trace_correlation_id_in_response: missing trace_correlation_id "
+            "for run_id=%s - cannot include in response",
+            run_id
+        )
+        return response_data
+    # 1. Add to top-level (REQUIRED)
+    if "trace_correlation_id" not in response_data:
+        response_data["trace_correlation_id"] = trace_correlation_id
+        logger.info(
+            "include_trace_correlation_id: added to top-level run_id=%s cid=%s",
+            run_id,
+            trace_correlation_id
+        )
+    # 2. Add to pipeline_metadata (REQUIRED)
+    pipeline_meta = response_data.get("pipeline_metadata")
+    if not isinstance(pipeline_meta, dict):
+        pipeline_meta = {}
+        response_data["pipeline_metadata"] = pipeline_meta
+    if "trace_correlation_id" not in pipeline_meta:
+        pipeline_meta["trace_correlation_id"] = trace_correlation_id
+        logger.info(
+            "include_trace_correlation_id: added to pipeline_metadata run_id=%s cid=%s",
+            run_id,
+            trace_correlation_id
+        )
+    # 3. Add to each trajectory (REQUIRED)
+    trajectories = response_data.get("trajectories", [])
+    if isinstance(trajectories, list):
+        for idx, traj in enumerate(trajectories):
+            if isinstance(traj, dict) and "trace_correlation_id" not in traj:
+                traj["trace_correlation_id"] = trace_correlation_id
+                logger.debug(
+                    "include_trace_correlation_id: added to trajectory[%d] run_id=%s cid=%s",
+                    idx,
+                    run_id,
+                    trace_correlation_id
+                )
+    logger.info(
+        "include_trace_correlation_id: completed run_id=%s cid=%s "
+        "added to %d locations (top-level, metadata, %d trajectories)",
+        run_id,
+        trace_correlation_id,
+        2 + len(trajectories),
+        len(trajectories)
+    )
+    return response_data
+def verify_trace_correlation_id_in_response(
+    response_data: dict[str, Any],
+    expected_correlation_id: str | None,
+    run_id: str
+) -> bool:
+    """
+    Verify that trace_correlation_id is present in all required locations.
+    Args:
+        response_data: RolloutResponse dict to verify
+        expected_correlation_id: The correlation ID that should be present
+        run_id: Rollout run_id for logging
+    Returns:
+        True if all required locations have the correlation ID, False otherwise
+    """
+    if not expected_correlation_id:
+        logger.error(
+            "verify_trace_correlation_id: no expected_correlation_id provided for run_id=%s",
+            run_id
+        )
+        return False
+    errors = []
+    # Check top-level
+    if response_data.get("trace_correlation_id") != expected_correlation_id:
+        errors.append(
+            f"Top-level missing or mismatch: "
+            f"expected={expected_correlation_id} actual={response_data.get('trace_correlation_id')}"
+        )
+    # Check pipeline_metadata
+    pipeline_meta = response_data.get("pipeline_metadata", {})
+    if not isinstance(pipeline_meta, dict) or pipeline_meta.get("trace_correlation_id") != expected_correlation_id:
+        errors.append(
+            f"pipeline_metadata missing or mismatch: "
+            f"expected={expected_correlation_id} actual={pipeline_meta.get('trace_correlation_id') if isinstance(pipeline_meta, dict) else 'NOT_A_DICT'}"
+        )
+    # Check trajectories
+    trajectories = response_data.get("trajectories", [])
+    if isinstance(trajectories, list):
+        for idx, traj in enumerate(trajectories):
+            if isinstance(traj, dict) and traj.get("trace_correlation_id") != expected_correlation_id:
+                errors.append(
+                    f"trajectory[{idx}] missing or mismatch: "
+                    f"expected={expected_correlation_id} actual={traj.get('trace_correlation_id')}"
+                )
+    if errors:
+        logger.error(
+            "verify_trace_correlation_id: FAILED run_id=%s\n%s",
+            run_id,
+            "\n".join(errors)
+        )
+        return False
+    logger.info(
+        "verify_trace_correlation_id: PASSED run_id=%s cid=%s",
+        run_id,
+        expected_correlation_id
+    )
+    return True

synth_ai/task/validators.py CHANGED Viewed

@@ -4,6 +4,7 @@ from __future__ import annotations
 import re
 from typing import Any
+from urllib.parse import urlparse, urlunparse
 import click
 import httpx
@@ -11,6 +12,149 @@ import httpx
 from synth_ai.task.contracts import TaskAppEndpoints  # type: ignore[attr-defined]
+def validate_rollout_response_for_rl(response_data: dict[str, Any], *, warn_only: bool = False) -> list[str]:
+    """Validate that a task app rollout response has required fields for RL training.
+    The backend RL trainer requires:
+    1. pipeline_metadata["inference_url"] at top level (with ?cid= for trace correlation)
+    2. Each step's info.meta["inference_url"] must be present (nested structure!)
+    Args:
+        response_data: The rollout response dict from task app
+        warn_only: If True, return warnings instead of raising exceptions
+    Returns:
+        List of validation warnings/errors
+    Raises:
+        ValueError: If critical fields are missing (unless warn_only=True)
+    """
+    issues = []
+    # Check pipeline_metadata
+    pipeline_metadata = response_data.get("pipeline_metadata")
+    if not isinstance(pipeline_metadata, dict):
+        issues.append("Missing or invalid 'pipeline_metadata' (required for RL training)")
+    else:
+        inference_url = pipeline_metadata.get("inference_url")
+        if not inference_url:
+            issues.append(
+                "pipeline_metadata['inference_url'] is missing. "
+                "RL trainer requires this field to extract traces."
+            )
+        elif not isinstance(inference_url, str):
+            issues.append(
+                f"pipeline_metadata['inference_url'] must be a string, got: {type(inference_url).__name__}"
+            )
+        elif "?cid=" not in inference_url:
+            issues.append(
+                f"pipeline_metadata['inference_url'] should contain '?cid=' for trace correlation. "
+                f"Got: {inference_url[:80]}..."
+            )
+    # Check trajectories and steps
+    trajectories = response_data.get("trajectories", [])
+    if not trajectories:
+        issues.append("No trajectories found in response")
+    for traj_idx, trajectory in enumerate(trajectories):
+        if not isinstance(trajectory, dict):
+            continue
+        steps = trajectory.get("steps", [])
+        for step_idx, step in enumerate(steps):
+            if not isinstance(step, dict):
+                continue
+            step_info = step.get("info", {})
+            if not isinstance(step_info, dict):
+                issues.append(
+                    f"trajectory[{traj_idx}].steps[{step_idx}].info is not a dict"
+                )
+                continue
+            # Check for nested meta.inference_url (backend expects this structure!)
+            step_meta = step_info.get("meta", {})
+            if not isinstance(step_meta, dict):
+                issues.append(
+                    f"trajectory[{traj_idx}].steps[{step_idx}].info.meta is missing or not a dict. "
+                    f"RL trainer expects nested structure: info.meta.inference_url"
+                )
+                continue
+            step_inference_url = step_meta.get("inference_url")
+            if not step_inference_url:
+                issues.append(
+                    f"trajectory[{traj_idx}].steps[{step_idx}].info.meta['inference_url'] is missing. "
+                    f"RL trainer needs this for trace extraction (nested structure required!)"
+                )
+            elif not isinstance(step_inference_url, str):
+                issues.append(
+                    f"trajectory[{traj_idx}].steps[{step_idx}].info.meta['inference_url'] must be a string, "
+                    f"got: {type(step_inference_url).__name__}"
+                )
+    if issues and not warn_only:
+        error_msg = "Task app response validation failed for RL training:\n" + "\n".join(
+            f"  - {issue}" for issue in issues
+        )
+        raise ValueError(error_msg)
+    return issues
+def normalize_inference_url(url: str | None, *, default: str = "https://api.openai.com/v1/chat/completions") -> str:
+    """Normalize an inference URL to include the /v1/chat/completions path.
+    This utility ensures inference URLs have the correct path structure for OpenAI-compatible
+    chat completions endpoints, while preserving query parameters (e.g., ?cid=trace_123)
+    that may be added for tracing.
+    Args:
+        url: The inference URL to normalize (may be None or incomplete)
+        default: Default URL to use if url is None/empty
+    Returns:
+        Normalized URL with proper path and preserved query parameters
+    Examples:
+        >>> normalize_inference_url("https://api.groq.com")
+        'https://api.groq.com/v1/chat/completions'
+        >>> normalize_inference_url("https://modal.host?cid=trace_123")
+        'https://modal.host/v1/chat/completions?cid=trace_123'
+        >>> normalize_inference_url("https://api.openai.com/v1")
+        'https://api.openai.com/v1/chat/completions'
+        >>> normalize_inference_url("https://api.groq.com/openai/v1/chat/completions")
+        'https://api.groq.com/openai/v1/chat/completions'
+    """
+    candidate = (url or default).strip()
+    if not candidate:
+        candidate = default
+    # Parse the URL to separate path and query components
+    parsed = urlparse(candidate)
+    # Check if path already ends with a completions endpoint
+    path = parsed.path.rstrip('/')
+    if path.endswith("/v1/chat/completions") or path.endswith("/chat/completions"):
+        return candidate
+    # Determine what to append based on existing path
+    if path.endswith("/v1"):
+        new_path = f"{path}/chat/completions"
+    elif path.endswith("/chat"):
+        new_path = f"{path}/completions"
+    else:
+        # Default: append full path
+        new_path = f"{path}/v1/chat/completions" if path else "/v1/chat/completions"
+    # Reconstruct URL with new path and original query/fragment
+    return urlunparse(parsed._replace(path=new_path))
 def validate_task_app_url(url: str | None) -> str:
     """Validate and normalize a task app URL.

synth_ai/tracing_v3/abstractions.py CHANGED Viewed

@@ -37,7 +37,7 @@ Concepts:
 from __future__ import annotations
 from dataclasses import asdict, dataclass, field
-from datetime import UTC, datetime
+from datetime import datetime, timezone
 from typing import Any
 from .lm_call_record_abstractions import LLMCallRecord
@@ -249,7 +249,7 @@ class SessionTimeStep:
     step_id: str = ""
     step_index: int = 0
-    timestamp: datetime = field(default_factory=lambda: datetime.now(UTC))
+    timestamp: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
     turn_number: int | None = None
     events: list[BaseEvent] = field(default_factory=list)
     markov_blanket_messages: list[SessionEventMarkovBlanketMessage] = field(default_factory=list)
@@ -283,7 +283,7 @@ class SessionTrace:
     """
     session_id: str = ""
-    created_at: datetime = field(default_factory=lambda: datetime.now(UTC))
+    created_at: datetime = field(default_factory=lambda: datetime.now(timezone.utc))
     session_time_steps: list[SessionTimeStep] = field(default_factory=list)
     event_history: list[BaseEvent] = field(default_factory=list)
     markov_blanket_message_history: list[SessionEventMarkovBlanketMessage] = field(

synth_ai/tracing_v3/llm_call_record_helpers.py CHANGED Viewed

@@ -8,7 +8,7 @@ from __future__ import annotations
 import uuid
 from dataclasses import dataclass, field
-from datetime import UTC, datetime
+from datetime import datetime, timezone
 from typing import Any, TypedDict, cast
 from .lm_call_record_abstractions import (
@@ -180,8 +180,8 @@ def create_llm_call_record_from_response(
         api_type=api_type,
         provider=provider,
         model_name=model_name,
-        started_at=started_at or datetime.now(UTC),
-        completed_at=completed_at or datetime.now(UTC),
+        started_at=started_at or datetime.now(timezone.utc),
+        completed_at=completed_at or datetime.now(timezone.utc),
         latency_ms=latency_ms,
         request_params=params,
         input_messages=input_messages,
@@ -376,8 +376,8 @@ def create_llm_call_record_from_streaming(
         api_type="responses",  # Streaming typically from Responses API
         provider=provider,
         model_name=model_name,
-        started_at=started_at or datetime.now(UTC),
-        completed_at=completed_at or datetime.now(UTC),
+        started_at=started_at or datetime.now(timezone.utc),
+        completed_at=completed_at or datetime.now(timezone.utc),
         latency_ms=latency_ms,
         request_params=params,
         input_messages=input_messages,

synth-ai 0.2.13.dev2__py3-none-any.whl → 0.2.14__py3-none-any.whl

Potentially problematic release.

synth-ai 0.2.13.dev2py3-none-any.whl → 0.2.14py3-none-any.whl