PyPI - synth-ai - Versions diffs - 0.4.1__py3-none-any.whl → 0.4.4__py3-none-any.whl - Mend

synth-ai 0.4.1py3-none-any.whl → 0.4.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of synth-ai might be problematic. Click here for more details.

Files changed (153) hide show

synth_ai/__init__.py +13 -13
synth_ai/cli/__init__.py +6 -15
synth_ai/cli/commands/eval/__init__.py +6 -15
synth_ai/cli/commands/eval/config.py +338 -0
synth_ai/cli/commands/eval/core.py +236 -1091
synth_ai/cli/commands/eval/runner.py +704 -0
synth_ai/cli/commands/eval/validation.py +44 -117
synth_ai/cli/commands/filter/core.py +7 -7
synth_ai/cli/commands/filter/validation.py +2 -2
synth_ai/cli/commands/smoke/core.py +7 -17
synth_ai/cli/commands/status/__init__.py +1 -64
synth_ai/cli/commands/status/client.py +50 -151
synth_ai/cli/commands/status/config.py +3 -83
synth_ai/cli/commands/status/errors.py +4 -13
synth_ai/cli/commands/status/subcommands/__init__.py +2 -8
synth_ai/cli/commands/status/subcommands/config.py +13 -0
synth_ai/cli/commands/status/subcommands/files.py +18 -63
synth_ai/cli/commands/status/subcommands/jobs.py +28 -311
synth_ai/cli/commands/status/subcommands/models.py +18 -62
synth_ai/cli/commands/status/subcommands/runs.py +16 -63
synth_ai/cli/commands/status/subcommands/session.py +67 -172
synth_ai/cli/commands/status/subcommands/summary.py +24 -32
synth_ai/cli/commands/status/subcommands/utils.py +41 -0
synth_ai/cli/commands/status/utils.py +16 -107
synth_ai/cli/commands/train/__init__.py +18 -20
synth_ai/cli/commands/train/errors.py +3 -3
synth_ai/cli/commands/train/prompt_learning_validation.py +15 -16
synth_ai/cli/commands/train/validation.py +7 -7
synth_ai/cli/commands/train/{judge_schemas.py → verifier_schemas.py} +33 -34
synth_ai/cli/commands/train/verifier_validation.py +235 -0
synth_ai/cli/demo_apps/demo_task_apps/math/config.toml +0 -1
synth_ai/cli/demo_apps/demo_task_apps/math/modal_task_app.py +2 -6
synth_ai/cli/demo_apps/math/config.toml +0 -1
synth_ai/cli/demo_apps/math/modal_task_app.py +2 -6
synth_ai/cli/demo_apps/mipro/task_app.py +25 -47
synth_ai/cli/lib/apps/task_app.py +12 -13
synth_ai/cli/lib/task_app_discovery.py +6 -6
synth_ai/cli/lib/train_cfgs.py +10 -10
synth_ai/cli/task_apps/__init__.py +11 -0
synth_ai/cli/task_apps/commands.py +7 -15
synth_ai/core/env.py +12 -1
synth_ai/core/errors.py +1 -2
synth_ai/core/integrations/cloudflare.py +209 -33
synth_ai/core/tracing_v3/abstractions.py +46 -0
synth_ai/data/__init__.py +3 -30
synth_ai/data/enums.py +1 -20
synth_ai/data/rewards.py +100 -3
synth_ai/products/graph_evolve/__init__.py +1 -2
synth_ai/products/graph_evolve/config.py +16 -16
synth_ai/products/graph_evolve/converters/__init__.py +3 -3
synth_ai/products/graph_evolve/converters/openai_sft.py +7 -7
synth_ai/products/graph_evolve/examples/hotpotqa/config.toml +1 -1
synth_ai/products/graph_gepa/__init__.py +23 -0
synth_ai/products/graph_gepa/converters/__init__.py +19 -0
synth_ai/products/graph_gepa/converters/openai_sft.py +29 -0
synth_ai/sdk/__init__.py +45 -35
synth_ai/sdk/api/eval/__init__.py +33 -0
synth_ai/sdk/api/eval/job.py +732 -0
synth_ai/sdk/api/research_agent/__init__.py +276 -66
synth_ai/sdk/api/train/builders.py +181 -0
synth_ai/sdk/api/train/cli.py +41 -33
synth_ai/sdk/api/train/configs/__init__.py +6 -4
synth_ai/sdk/api/train/configs/prompt_learning.py +127 -33
synth_ai/sdk/api/train/configs/rl.py +264 -16
synth_ai/sdk/api/train/configs/sft.py +165 -1
synth_ai/sdk/api/train/graph_validators.py +12 -12
synth_ai/sdk/api/train/graphgen.py +169 -51
synth_ai/sdk/api/train/graphgen_models.py +95 -45
synth_ai/sdk/api/train/local_api.py +10 -0
synth_ai/sdk/api/train/pollers.py +36 -0
synth_ai/sdk/api/train/prompt_learning.py +390 -60
synth_ai/sdk/api/train/rl.py +41 -5
synth_ai/sdk/api/train/sft.py +2 -0
synth_ai/sdk/api/train/task_app.py +20 -0
synth_ai/sdk/api/train/validators.py +17 -17
synth_ai/sdk/graphs/completions.py +239 -33
synth_ai/sdk/{judging/schemas.py → graphs/verifier_schemas.py} +23 -23
synth_ai/sdk/learning/__init__.py +35 -5
synth_ai/sdk/learning/context_learning_client.py +531 -0
synth_ai/sdk/learning/context_learning_types.py +294 -0
synth_ai/sdk/learning/prompt_learning_client.py +1 -1
synth_ai/sdk/learning/prompt_learning_types.py +2 -1
synth_ai/sdk/learning/rl/__init__.py +0 -4
synth_ai/sdk/learning/rl/contracts.py +0 -4
synth_ai/sdk/localapi/__init__.py +40 -0
synth_ai/sdk/localapi/apps/__init__.py +28 -0
synth_ai/sdk/localapi/client.py +10 -0
synth_ai/sdk/localapi/contracts.py +10 -0
synth_ai/sdk/localapi/helpers.py +519 -0
synth_ai/sdk/localapi/rollouts.py +93 -0
synth_ai/sdk/localapi/server.py +29 -0
synth_ai/sdk/localapi/template.py +49 -0
synth_ai/sdk/streaming/handlers.py +6 -6
synth_ai/sdk/streaming/streamer.py +10 -6
synth_ai/sdk/task/__init__.py +18 -5
synth_ai/sdk/task/apps/__init__.py +37 -1
synth_ai/sdk/task/client.py +9 -1
synth_ai/sdk/task/config.py +6 -11
synth_ai/sdk/task/contracts.py +137 -95
synth_ai/sdk/task/in_process.py +32 -22
synth_ai/sdk/task/in_process_runner.py +9 -4
synth_ai/sdk/task/rubrics/__init__.py +2 -3
synth_ai/sdk/task/rubrics/loaders.py +4 -4
synth_ai/sdk/task/rubrics/strict.py +3 -4
synth_ai/sdk/task/server.py +76 -16
synth_ai/sdk/task/trace_correlation_helpers.py +190 -139
synth_ai/sdk/task/validators.py +34 -49
synth_ai/sdk/training/__init__.py +7 -16
synth_ai/sdk/tunnels/__init__.py +118 -0
synth_ai/sdk/tunnels/cleanup.py +83 -0
synth_ai/sdk/tunnels/ports.py +120 -0
synth_ai/sdk/tunnels/tunneled_api.py +363 -0
{synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/METADATA +71 -4
{synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/RECORD +118 -128
synth_ai/cli/commands/baseline/__init__.py +0 -12
synth_ai/cli/commands/baseline/core.py +0 -636
synth_ai/cli/commands/baseline/list.py +0 -94
synth_ai/cli/commands/eval/errors.py +0 -81
synth_ai/cli/commands/status/formatters.py +0 -164
synth_ai/cli/commands/status/subcommands/pricing.py +0 -23
synth_ai/cli/commands/status/subcommands/usage.py +0 -203
synth_ai/cli/commands/train/judge_validation.py +0 -305
synth_ai/cli/usage.py +0 -159
synth_ai/data/specs.py +0 -36
synth_ai/sdk/api/research_agent/cli.py +0 -428
synth_ai/sdk/api/research_agent/config.py +0 -357
synth_ai/sdk/api/research_agent/job.py +0 -717
synth_ai/sdk/baseline/__init__.py +0 -25
synth_ai/sdk/baseline/config.py +0 -209
synth_ai/sdk/baseline/discovery.py +0 -216
synth_ai/sdk/baseline/execution.py +0 -154
synth_ai/sdk/judging/__init__.py +0 -15
synth_ai/sdk/judging/base.py +0 -24
synth_ai/sdk/judging/client.py +0 -191
synth_ai/sdk/judging/types.py +0 -42
synth_ai/sdk/research_agent/__init__.py +0 -34
synth_ai/sdk/research_agent/container_builder.py +0 -328
synth_ai/sdk/research_agent/container_spec.py +0 -198
synth_ai/sdk/research_agent/defaults.py +0 -34
synth_ai/sdk/research_agent/results_collector.py +0 -69
synth_ai/sdk/specs/__init__.py +0 -46
synth_ai/sdk/specs/dataclasses.py +0 -149
synth_ai/sdk/specs/loader.py +0 -144
synth_ai/sdk/specs/serializer.py +0 -199
synth_ai/sdk/specs/validation.py +0 -250
synth_ai/sdk/tracing/__init__.py +0 -39
synth_ai/sdk/usage/__init__.py +0 -37
synth_ai/sdk/usage/client.py +0 -171
synth_ai/sdk/usage/models.py +0 -261
{synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/WHEEL +0 -0
{synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/entry_points.txt +0 -0
{synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/licenses/LICENSE +0 -0
{synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/top_level.txt +0 -0

synth_ai/core/env.py CHANGED Viewed

@@ -14,7 +14,7 @@ from typing import Literal
 from .errors import AuthenticationError, ConfigError
 # Default production URL
-PROD_BASE_URL = "https://www.api.usesynth.ai"
+PROD_BASE_URL = "https://api.usesynth.ai"
 PROD_BASE_URL_DEFAULT = PROD_BASE_URL  # Alias for backward compatibility
@@ -180,6 +180,8 @@ def get_backend_from_env() -> tuple[str, str]:
     direct_override = (os.environ.get("BACKEND_OVERRIDE") or "").strip()
     if direct_override:
         base = _normalize_url(direct_override)
+        if not base:
+            raise ConfigError("BACKEND_OVERRIDE is set but empty or invalid")
         api_key = os.environ.get("SYNTH_API_KEY", "").strip()
         return base, api_key
@@ -189,16 +191,25 @@ def get_backend_from_env() -> tuple[str, str]:
     if mode == "local":
         base = os.environ.get("LOCAL_BACKEND_URL", "http://localhost:8000")
+        # If explicitly set to empty string, use default
+        if not base or not base.strip():
+            base = "http://localhost:8000"
         key = os.environ.get("TESTING_LOCAL_SYNTH_API_KEY", "")
         return _normalize_url(base), key
     if mode == "dev":
         base = os.environ.get("DEV_BACKEND_URL", "") or "http://localhost:8000"
+        # If explicitly set to empty string, use default
+        if not base or not base.strip():
+            base = "http://localhost:8000"
         key = os.environ.get("DEV_SYNTH_API_KEY", "")
         return _normalize_url(base), key
     # prod
     base = os.environ.get("PROD_BACKEND_URL", PROD_BASE_URL)
+    # If explicitly set to empty string, use default
+    if not base or not base.strip():
+        base = PROD_BASE_URL
     key = (
         os.environ.get("PROD_SYNTH_API_KEY", "")
         or os.environ.get("TESTING_PROD_SYNTH_API_KEY", "")

synth_ai/core/errors.py CHANGED Viewed

@@ -87,7 +87,7 @@ class UsageLimitError(SynthError):
     Attributes:
         limit_type: The type of limit exceeded (e.g., "inference_tokens_per_day")
-        api: The API that hit the limit (e.g., "inference", "judges", "prompt_opt")
+        api: The API that hit the limit (e.g., "inference", "verifiers", "prompt_opt")
         current: Current usage value
         limit: The limit value
         tier: The org's tier (e.g., "free", "starter", "growth")
@@ -123,4 +123,3 @@ __all__ = [
     "ModelNotSupportedError",
     "UsageLimitError",
 ]

synth_ai/core/integrations/cloudflare.py CHANGED Viewed

@@ -738,13 +738,20 @@ async def resolve_hostname_with_explicit_resolvers(hostname: str) -> str:
     timeout = float(os.getenv("SYNTH_TUNNEL_DNS_TIMEOUT_PER_ATTEMPT_SECS", "5"))
     loop = asyncio.get_event_loop()
-    # Try Cloudflare / Google first via `dig`, then fall back to system resolver
-    for resolver_ip in ("1.1.1.1", "8.8.8.8"):
+    # Try various dig resolvers, then fall back to system resolver
+    # Order: 1.1.1.1, 8.8.8.8, then plain dig (uses system's configured DNS but bypasses cache)
+    resolvers = [
+        ("1.1.1.1", ["dig", "@1.1.1.1", "+short", hostname]),
+        ("8.8.8.8", ["dig", "@8.8.8.8", "+short", hostname]),
+        ("default", ["dig", "+short", hostname]),  # Plain dig bypasses negative cache
+    ]
+    for resolver_name, cmd in resolvers:
         try:
             result = await loop.run_in_executor(
                 None,
-                lambda ip=resolver_ip: subprocess.run(
-                    ["dig", f"@{ip}", "+short", hostname],
+                lambda c=cmd: subprocess.run(
+                    c,
                     capture_output=True,
                     text=True,
                     timeout=timeout,
@@ -753,17 +760,17 @@ async def resolve_hostname_with_explicit_resolvers(hostname: str) -> str:
             if result.returncode == 0 and result.stdout.strip():
                 first = result.stdout.strip().splitlines()[0].strip()
                 if first:
-                    logger.debug(f"Resolved via {resolver_ip}: {hostname} -> {first}")
+                    logger.debug(f"Resolved via {resolver_name}: {hostname} -> {first}")
                     return first
         except FileNotFoundError:
-            logger.debug(f"dig not found, skipping {resolver_ip}")
+            logger.debug(f"dig not found, skipping {resolver_name}")
             continue
         except Exception as e:
-            logger.debug(f"Resolver {resolver_ip} failed: {e}")
+            logger.debug(f"Resolver {resolver_name} failed: {e}")
             continue
-    # Fallback: system resolver
-    logger.debug(f"Falling back to system resolver for {hostname}")
+    # Final fallback: system resolver (may hit negative cache)
+    logger.debug(f"Falling back to socket.gethostbyname for {hostname}")
     return await loop.run_in_executor(
         None,
         socket.gethostbyname,
@@ -822,13 +829,24 @@ async def verify_tunnel_dns_resolution(
             resolved_ip = await resolve_hostname_with_explicit_resolvers(hostname)
             logger.info(f"DNS resolution successful (attempt {attempt}): {hostname} -> {resolved_ip}")
-            # 2. HTTP connectivity: hit the tunnel via the resolved IP, but keep Host header.
-            #    This avoids depending on the system resolver, which is what gave you EAI_NONAME.
+            # 2. HTTP connectivity: use curl with --resolve to bypass system DNS cache
+            #    The system resolver may have negative-cached the hostname, so we use
+            #    curl with explicit IP resolution to bypass it while maintaining proper SNI.
             try:
                 scheme = parsed.scheme or "https"
-                test_url = f"{scheme}://{resolved_ip}/health"
-                headers = {"Host": hostname}
+                test_url = f"{scheme}://{hostname}/health"
+                port = 443 if scheme == "https" else 80
+                # Build curl command with --resolve to bypass system DNS
+                # Format: --resolve hostname:port:ip
+                curl_cmd = [
+                    "curl", "-s", "-o", "/dev/null", "-w", "%{http_code}",
+                    "--max-time", "5",
+                    "-k",  # Allow self-signed certs
+                    "--resolve", f"{hostname}:{port}:{resolved_ip}",
+                    test_url,
+                ]
                 # Include API key if provided (or from env var)
                 if api_key is None:
                     # Try to load .env file if available
@@ -839,25 +857,34 @@ async def verify_tunnel_dns_resolution(
                         pass
                     api_key = os.getenv("ENVIRONMENT_API_KEY")
                 if api_key:
-                    headers["X-API-Key"] = api_key
-                # For Quick Tunnels, TLS cert is for *.trycloudflare.com, not the bare IP,
-                # so we disable verification here; this is just a readiness probe.
-                async with httpx.AsyncClient(timeout=5.0, verify=False) as client:
-                    resp = await client.get(test_url, headers=headers)
-                    # Accept 200 (OK), 400/401 (auth required - server is reachable), 404/405 (not found/method not allowed)
-                    # All of these indicate the tunnel is working and the server is responding
-                    if resp.status_code in (200, 400, 401, 404, 405):
-                        logger.info(f"HTTP connectivity verified via IP: {test_url} -> {resp.status_code}")
-                        return
+                    curl_cmd.extend(["-H", f"X-API-Key: {api_key}"])
+                result = await loop.run_in_executor(
+                    None,
+                    lambda: subprocess.run(curl_cmd, capture_output=True, text=True, timeout=10),
+                )
+                status_code = int(result.stdout.strip()) if result.returncode == 0 and result.stdout.strip().isdigit() else 0
+                # Accept various status codes that indicate the tunnel is working:
+                # - 200: OK (service is running)
+                # - 400/401/403: Auth required (server is reachable)
+                # - 404/405: Not found / method not allowed (server is reachable)
+                # - 502: Bad gateway (cloudflared connected but local service isn't running)
+                if status_code in (200, 400, 401, 403, 404, 405, 502):
+                    logger.info(f"HTTP connectivity verified: {test_url} -> {status_code}")
+                    return
+                else:
+                    # 530 errors are common when tunnel is still establishing - retry
+                    if status_code == 530:
+                        logger.debug("HTTP 530 (tunnel establishing) - will retry")
+                        last_exc = RuntimeError("tunnel not ready yet (HTTP 530)")
+                    elif result.returncode != 0:
+                        logger.warning(f"curl failed: {result.stderr}")
+                        last_exc = RuntimeError(f"curl failed: {result.stderr}")
                     else:
-                        # 530 errors are common when tunnel is still establishing - be lenient
-                        if resp.status_code == 530:
-                            logger.debug("HTTP 530 (tunnel establishing) - will retry")
-                            last_exc = RuntimeError("tunnel not ready yet (HTTP 530)")
-                        else:
-                            logger.warning(f"HTTP check returned unexpected status: {resp.status_code}")
-                            last_exc = RuntimeError(f"unexpected HTTP status {resp.status_code}")
+                        logger.warning(f"HTTP check returned unexpected status: {status_code}")
+                        last_exc = RuntimeError(f"unexpected HTTP status {status_code}")
             except Exception as http_exc:
                 logger.warning(f"HTTP connectivity check failed (attempt {attempt}): {http_exc}")
                 last_exc = http_exc
@@ -1102,6 +1129,155 @@ def open_managed_tunnel(tunnel_token: str) -> subprocess.Popen:
     )
+async def wait_for_cloudflared_connection(
+    proc: subprocess.Popen,
+    timeout_seconds: float = 30.0,
+) -> bool:
+    """
+    Wait for cloudflared to establish a connection to Cloudflare's edge.
+    This monitors cloudflared's stdout/stderr for connection success messages.
+    DNS records only resolve AFTER cloudflared has connected, so this function
+    must be called before attempting DNS verification.
+    Args:
+        proc: The cloudflared subprocess from open_managed_tunnel()
+        timeout_seconds: Maximum time to wait for connection
+    Returns:
+        True if connection was established, False if timeout or error
+    Raises:
+        RuntimeError: If cloudflared exits with an error before connecting
+    """
+    import select
+    # Patterns that indicate successful connection
+    # cloudflared outputs: "INF Registered tunnel connection connIndex=0 connection=..."
+    # We need to be specific - "connIndex=" alone triggers too early on curve preferences log
+    connection_patterns = [
+        "Registered tunnel connection",
+        "Connection registered",
+        # Don't use "connIndex=" alone - it matches curve preferences log before actual connection
+    ]
+    # Patterns that indicate fatal errors
+    error_patterns = [
+        "failed to connect",
+        "error connecting",
+        "tunnel credentials",
+        "invalid token",
+        "tunnel not found",
+        "unauthorized",
+    ]
+    loop = asyncio.get_event_loop()
+    start_time = loop.time()
+    output_lines: list[str] = []
+    logger.info(f"Waiting for cloudflared to connect (timeout {timeout_seconds}s)...")
+    while True:
+        elapsed = loop.time() - start_time
+        if elapsed >= timeout_seconds:
+            logger.warning(
+                f"cloudflared connection timeout after {elapsed:.1f}s. "
+                f"Output: {' | '.join(output_lines[-10:])}"
+            )
+            return False
+        # Check if process exited
+        if proc.poll() is not None:
+            # Process exited - read remaining output
+            remaining = proc.stdout.read() if proc.stdout else ""
+            if remaining:
+                output_lines.extend(remaining.splitlines())
+            all_output = "\n".join(output_lines)
+            logger.error(
+                f"cloudflared exited with code {proc.returncode} before connecting. "
+                f"Output:\n{all_output[:2000]}"
+            )
+            raise RuntimeError(
+                f"cloudflared exited with code {proc.returncode} before establishing connection. "
+                f"This usually means the tunnel token is invalid or the tunnel was deleted. "
+                f"Output: {all_output[:500]}"
+            )
+        # Try to read output (non-blocking)
+        if proc.stdout:
+            try:
+                # Use select for non-blocking read
+                ready, _, _ = select.select([proc.stdout], [], [], 0.1)
+                if ready:
+                    line = proc.stdout.readline()
+                    if line:
+                        line = line.strip()
+                        output_lines.append(line)
+                        logger.debug(f"cloudflared: {line}")
+                        # Check for connection success
+                        line_lower = line.lower()
+                        for pattern in connection_patterns:
+                            if pattern.lower() in line_lower:
+                                logger.info(
+                                    f"cloudflared connected after {elapsed:.1f}s: {line}"
+                                )
+                                return True
+                        # Check for fatal errors
+                        for pattern in error_patterns:
+                            if pattern.lower() in line_lower:
+                                logger.error(f"cloudflared error detected: {line}")
+                                raise RuntimeError(
+                                    f"cloudflared connection failed: {line}"
+                                )
+            except (ValueError, OSError) as e:
+                logger.debug(f"Error reading cloudflared output: {e}")
+        # Small sleep to avoid busy loop
+        await asyncio.sleep(0.1)
+async def open_managed_tunnel_with_connection_wait(
+    tunnel_token: str,
+    timeout_seconds: float = 30.0,
+) -> subprocess.Popen:
+    """
+    Open a managed tunnel and wait for cloudflared to connect.
+    This is the preferred method for starting managed tunnels as it ensures
+    cloudflared has actually connected to Cloudflare's edge before returning.
+    DNS records only resolve after this connection is established.
+    Args:
+        tunnel_token: Cloudflare tunnel token from backend API
+        timeout_seconds: Maximum time to wait for connection
+    Returns:
+        Process handle for the connected tunnel
+    Raises:
+        RuntimeError: If cloudflared fails to connect within timeout
+    """
+    proc = open_managed_tunnel(tunnel_token)
+    try:
+        connected = await wait_for_cloudflared_connection(proc, timeout_seconds)
+        if not connected:
+            # Timeout - kill process and raise
+            stop_tunnel(proc)
+            raise RuntimeError(
+                f"cloudflared failed to connect within {timeout_seconds}s. "
+                "The tunnel may be invalid or Cloudflare may be experiencing issues."
+            )
+        return proc
+    except Exception:
+        # Cleanup on any error
+        stop_tunnel(proc)
+        raise
 def stop_tunnel(proc: Optional[subprocess.Popen]) -> None:
     """
     Gracefully stop a tunnel process.

synth_ai/core/tracing_v3/abstractions.py CHANGED Viewed

@@ -32,6 +32,52 @@ Concepts:
   tool result back, and the agent sending a reply to the user. Do not confuse these with
   provider-specific LLM API "messages" (prompt formatting) — those belong inside an LMCAISEvent
   as part of its input/output content, not as SessionEventMessages.
+Example usage:
+```python
+from synth_ai.core.tracing_v3.abstractions import (
+    SessionTrace,
+    SessionTimeStep,
+    LMCAISEvent,
+    EnvironmentEvent,
+    TimeRecord,
+)
+import time
+# Create a simple trace with one turn
+trace = SessionTrace(
+    session_id="sess_example",
+    session_time_steps=[
+        SessionTimeStep(
+            step_id="turn_1",
+            step_index=0,
+            events=[
+                LMCAISEvent(
+                    system_instance_id="llm",
+                    time_record=TimeRecord(event_time=time.time()),
+                    model_name="gpt-4o",
+                    input_tokens=150,
+                    output_tokens=50,
+                ),
+                EnvironmentEvent(
+                    system_instance_id="tool_executor",
+                    time_record=TimeRecord(event_time=time.time()),
+                    reward=1.0,
+                    terminated=True,
+                ),
+            ],
+        )
+    ],
+)
+# Convert to dict for serialization
+trace_dict = trace.to_dict()
+```
+See Also:
+- V3 Traces SDK guide: /sdk/tracing/v3-traces
+- Event rewards: /sdk/tracing/rewards/event-rewards
 """
 from __future__ import annotations

synth_ai/data/__init__.py CHANGED Viewed

@@ -1,11 +1,11 @@
 """Synth AI Data Layer.
 This module provides pure data types with no IO dependencies.
-Contains actual data schemas for traces, rewards, and specs.
+Contains actual data schemas for traces and rewards.
 Data vs SDK distinction:
-- data/: Pure data records (traces, rewards, specs) - actual data
-- sdk/: API abstractions (jobs, training, judging) - SDK interfaces
+- data/: Pure data records (traces, rewards) - actual data
+- sdk/: API abstractions (jobs, training, graphs) - SDK interfaces
 Dependency rule: data/ imports nothing from synth_ai except typing helpers.
 """
@@ -16,13 +16,11 @@ from __future__ import annotations
 from synth_ai.data.enums import (
     AdaptiveBatchLevel,
     AdaptiveCurriculumLevel,
-    ContainerBackend,
     InferenceMode,
     JobStatus,
     JobType,
     PromptLearningMethod,
     ProviderName,
-    ResearchAgentAlgorithm,
     RewardSource,
     RLMethod,
     SFTMethod,
@@ -38,19 +36,6 @@ from synth_ai.data.rewards import (
     RewardRecord,
 )
-# Spec data types (re-exports)
-from synth_ai.data.specs import (
-    Constraints,
-    Example,
-    GlossaryItem,
-    Interfaces,
-    Metadata,
-    Principle,
-    Rule,
-    Spec,
-    TestCase,
-)
 # Trace data types (re-exports from tracing_v3)
 from synth_ai.data.traces import (
     BaseEvent,
@@ -71,8 +56,6 @@ __all__ = [
     "PromptLearningMethod",
     "RLMethod",
     "SFTMethod",
-    "ResearchAgentAlgorithm",
-    "ContainerBackend",
     "InferenceMode",
     "ProviderName",
     "RewardSource",
@@ -85,16 +68,6 @@ __all__ = [
     "RewardAggregates",
     "CalibrationExample",
     "GoldExample",
-    # Spec data
-    "Spec",
-    "Metadata",
-    "Principle",
-    "Rule",
-    "Constraints",
-    "Example",
-    "TestCase",
-    "Interfaces",
-    "GlossaryItem",
     # Trace data
     "SessionTrace",
     "SessionTimeStep",

synth_ai/data/enums.py CHANGED Viewed

@@ -54,22 +54,6 @@ class SFTMethod(str, Enum):
     QLORA = "qlora"
-class ResearchAgentAlgorithm(str, Enum):
-    """Research agent algorithms."""
-    SCAFFOLD_TUNING = "scaffold_tuning"
-    EVALUATION = "evaluation"
-    TRACE_ANALYSIS = "trace_analysis"
-class ContainerBackend(str, Enum):
-    """Container backends for research agent."""
-    DAYTONA = "daytona"
-    MODAL = "modal"
-    DOCKER = "docker"
 class InferenceMode(str, Enum):
     """Inference modes for policy evaluation."""
@@ -89,7 +73,7 @@ class RewardSource(str, Enum):
     """Source of reward signal for training."""
     TASK_APP = "task_app"
-    JUDGE = "judge"
+    VERIFIER = "verifier"
     FUSED = "fused"
@@ -128,8 +112,6 @@ __all__ = [
     "PromptLearningMethod",
     "RLMethod",
     "SFTMethod",
-    "ResearchAgentAlgorithm",
-    "ContainerBackend",
     "InferenceMode",
     "ProviderName",
     "RewardSource",
@@ -138,4 +120,3 @@ __all__ = [
     "SynthModelName",
     "SYNTH_MODEL_NAMES",
 ]

synth_ai/data/rewards.py CHANGED Viewed

@@ -2,6 +2,43 @@
 This module defines pure data types for representing rewards in training
 and evaluation contexts. These are actual data records, not API abstractions.
+Synth AI uses two primary reward scopes:
+- **Event Rewards**: Fine-grained rewards attached to individual events within a session
+  (e.g., each tool call, each LLM response). Use `EventRewardRecord` to annotate specific
+  events with reward values.
+- **Outcome Rewards**: Episode-level rewards that summarize the overall success of a
+  complete session. Use `OutcomeRewardRecord` for aggregate metrics.
+Example usage:
+```python
+from synth_ai.data.rewards import EventRewardRecord, OutcomeRewardRecord
+# Annotate a specific event with a reward
+event_reward = EventRewardRecord(
+    event_id="evt_123",
+    session_id="sess_abc",
+    reward_value=0.8,
+    reward_type="evaluator",
+    annotation={"reason": "Correct tool selection"}
+)
+# Record episode-level outcome
+outcome = OutcomeRewardRecord(
+    session_id="sess_abc",
+    total_reward=0.85,
+    achievements_count=3,
+    total_steps=10,
+    metadata={"task": "code_generation"}
+)
+```
+See Also:
+- Event rewards SDK guide: /sdk/tracing/rewards/event-rewards
+- Outcome rewards SDK guide: /sdk/tracing/rewards/outcome-rewards
 """
 from __future__ import annotations
@@ -17,6 +54,20 @@ class RewardRecord:
     Represents a reward signal at a specific point in a trajectory,
     with metadata about its source and scope.
+    Attributes:
+        value: The numeric reward value (typically in range [0, 1] or unbounded).
+        reward_type: Category of reward - "shaped" (dense), "sparse" (terminal only),
+            "achievement" (milestone), "penalty" (negative signal), "evaluator"
+            (from LLM verifier), or "human" (manual annotation).
+        scope: Granularity level - "step" (per action), "event" (per significant event),
+            or "outcome" (episode-level).
+        source: Origin of the reward - "environment" (task env), "runner" (framework),
+            "evaluator" (verifier), or "human" (annotator).
+        key: Optional identifier like achievement name or rubric criterion ID.
+        turn: Turn number within the session where reward was earned.
+        timestamp: When the reward was recorded.
+        metadata: Additional context (e.g., rubric scores, evaluation details).
     """
     value: float
@@ -34,7 +85,29 @@ class OutcomeRewardRecord:
     """Episode-level reward summary.
     Aggregates reward information for a complete episode/session,
-    including total reward, achievements, and step counts.
+    including total reward, achievements, and step counts. This is the
+    primary data structure for outcome rewards used in training.
+    Attributes:
+        session_id: Unique identifier linking to the SessionTrace.
+        total_reward: Aggregate reward for the entire episode (typically 0.0-1.0).
+        achievements_count: Number of achievements/milestones reached.
+        total_steps: Total number of steps in the episode.
+        metadata: Task-specific metadata (e.g., {"task": "code_gen", "difficulty": "hard"}).
+        annotation: Human or evaluator annotations explaining the score.
+        created_at: When this record was created.
+    Example:
+        ```python
+        outcome = OutcomeRewardRecord(
+            session_id="sess_abc123",
+            total_reward=0.75,
+            achievements_count=2,
+            total_steps=8,
+            metadata={"task": "customer_support"},
+            annotation={"evaluator": "Resolved issue but could improve tone"}
+        )
+        ```
     """
     session_id: str
@@ -51,7 +124,32 @@ class EventRewardRecord:
     """Event-level reward annotation.
     Links a reward to a specific event in a trace, with optional
-    annotations and source information.
+    annotations and source information. Event rewards provide fine-grained
+    feedback on individual actions or decisions within a session.
+    Attributes:
+        event_id: Unique identifier of the event being rewarded.
+        session_id: Session containing this event.
+        reward_value: Reward for this specific event (typically 0.0-1.0).
+        reward_type: Category of reward (e.g., "tool_success", "reasoning", "progress").
+        key: Rubric criterion or achievement key this reward relates to.
+        turn_number: Turn/step within the session where event occurred.
+        source: Origin of the reward ("environment", "evaluator", "human").
+        annotation: Explanation or details about why this reward was given.
+        created_at: When this record was created.
+    Example:
+        ```python
+        event_reward = EventRewardRecord(
+            event_id="evt_tool_call_5",
+            session_id="sess_abc123",
+            reward_value=1.0,
+            reward_type="tool_success",
+            turn_number=3,
+            source="environment",
+            annotation={"tool": "search", "result": "found_answer"}
+        )
+        ```
     """
     event_id: str
@@ -149,4 +247,3 @@ __all__ = [
     "CalibrationExample",
     "GoldExample",
 ]

synth_ai/products/graph_evolve/__init__.py CHANGED Viewed

@@ -4,7 +4,7 @@ This product provides tools for optimizing LLM-based workflow graphs
 using evolutionary algorithms. It can optimize both:
 - **Policy graphs**: Graphs that solve tasks (e.g., multi-hop QA, reasoning)
-- **Verifier graphs**: Graphs that judge/score existing results
+- **Verifier graphs**: Graphs that verify/score existing results
 Algorithms:
 - `graph_evolve`: Evolutionary optimization for graph structure
@@ -43,4 +43,3 @@ __all__ = [
     "ConversionWarning",
     "ConversionError",
 ]

synth-ai 0.4.1__py3-none-any.whl → 0.4.4__py3-none-any.whl

Potentially problematic release.

synth-ai 0.4.1py3-none-any.whl → 0.4.4py3-none-any.whl