PyPI - synth-ai - Versions diffs - 0.2.12__py3-none-any.whl → 0.2.13.dev1__py3-none-any.whl - Mend

synth-ai 0.2.12py3-none-any.whl → 0.2.13.dev1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of synth-ai might be problematic. Click here for more details.

Files changed (48) hide show

examples/agora_ex/README_MoE.md +224 -0
examples/agora_ex/__init__.py +7 -0
examples/agora_ex/agora_ex.py +65 -0
examples/agora_ex/agora_ex_task_app.py +590 -0
examples/agora_ex/configs/rl_lora_qwen3_moe_2xh200.toml +121 -0
examples/agora_ex/reward_fn_grpo-human.py +129 -0
examples/agora_ex/system_prompt_CURRENT.md +63 -0
examples/agora_ex/task_app/agora_ex_task_app.py +590 -0
examples/agora_ex/task_app/reward_fn_grpo-human.py +129 -0
examples/agora_ex/task_app/system_prompt_CURRENT.md +63 -0
examples/multi_step/configs/crafter_rl_outcome.toml +74 -0
examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +175 -0
examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +83 -0
examples/multi_step/configs/crafter_rl_stepwise_simple.toml +78 -0
examples/multi_step/crafter_rl_lora.md +51 -10
examples/multi_step/sse_metrics_streaming_notes.md +357 -0
examples/multi_step/task_app_config_notes.md +7 -1
examples/warming_up_to_rl/configs/eval_stepwise_complex.toml +4 -2
examples/warming_up_to_rl/configs/eval_stepwise_simple.toml +4 -2
examples/warming_up_to_rl/run_eval.py +127 -18
examples/warming_up_to_rl/task_app/grpo_crafter.py +3 -33
examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +109 -45
examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +42 -46
examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +232 -193
synth_ai/__init__.py +41 -1
synth_ai/api/train/builders.py +49 -19
synth_ai/api/train/configs/__init__.py +44 -0
synth_ai/api/train/configs/rl.py +133 -0
synth_ai/api/train/configs/sft.py +94 -0
synth_ai/api/train/configs/shared.py +24 -0
synth_ai/cli/demo.py +38 -39
synth_ai/cli/rl_demo.py +81 -102
synth_ai/cli/task_apps.py +3 -0
synth_ai/demos/core/cli.py +121 -159
synth_ai/environments/examples/crafter_classic/environment.py +16 -0
synth_ai/evals/__init__.py +15 -0
synth_ai/evals/client.py +85 -0
synth_ai/evals/types.py +42 -0
synth_ai/judge_schemas.py +127 -0
synth_ai/rubrics/__init__.py +22 -0
synth_ai/rubrics/validators.py +126 -0
synth_ai/tracing_v3/serialization.py +130 -0
{synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev1.dist-info}/METADATA +1 -1
{synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev1.dist-info}/RECORD +48 -22
{synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev1.dist-info}/entry_points.txt +0 -1
{synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev1.dist-info}/WHEEL +0 -0
{synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev1.dist-info}/licenses/LICENSE +0 -0
{synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev1.dist-info}/top_level.txt +0 -0

examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py CHANGED Viewed

@@ -2,7 +2,10 @@ from __future__ import annotations
 import asyncio
 import contextlib
+import json
 import logging
+import os
+import time
 from typing import Any
 import httpx
@@ -23,9 +26,15 @@ class OpenAIClient:
         self.api_key = api_key
         self.timeout_s = timeout_s
         self.headers = {}
-        if api_key:
-            self.headers["Authorization"] = f"Bearer {api_key}"
+        # If we're calling back into our own task app proxy (e.g., /proxy/groq),
+        # the FastAPI app still enforces X-API-Key. Include it when available so
+        # intra-app proxy calls authenticate correctly.
+        try:
+            env_key = os.getenv("ENVIRONMENT_API_KEY")
+            if env_key and isinstance(env_key, str):
+                self.headers.setdefault("X-API-Key", env_key)
+        except Exception:
+            pass
     def _fix_model_parameters(
         self, request: dict[str, Any], target_url: str | None = None
@@ -52,6 +61,8 @@ class OpenAIClient:
                     or ("azure" in low and ".openai." in low)
                     or ("groq.com" in low)
                     or ("/openai" in low)
+                    or ("/proxy/groq" in low)
+                    or ("/proxy/openai" in low)
                 )
         except Exception:
             is_openai = False
@@ -137,13 +148,53 @@ class OpenAIClient:
         Returns:
             OpenAI-compatible chat completion response
         """
-        url = (base_url or self.base_url).rstrip("/") + "/v1/chat/completions"
+        base = (base_url or self.base_url).rstrip("/")
+        url = base + "/v1/chat/completions"
         timeout = timeout_s or self.timeout_s
         # Merge headers
         headers = self.headers.copy()
         if extra_headers:
             headers.update(extra_headers)
+        # Always include X-API-Key for intra-app requests
+        try:
+            envk = os.getenv("ENVIRONMENT_API_KEY")
+            if envk and isinstance(envk, str):
+                headers["X-API-Key"] = envk
+        except Exception:
+            pass
+        # If target is our in-app Groq proxy, force Authorization to use GROQ_API_KEY
+        try:
+            low_url = (url or "").lower()
+            if "/proxy/groq" in low_url or "groq" in low_url:
+                gk = os.getenv("GROQ_API_KEY")
+                if gk and isinstance(gk, str):
+                    headers["Authorization"] = f"Bearer {gk}"
+        except Exception:
+            pass
+        # In-process proxy path: avoid HTTP round-trip and auth dependency
+        try:
+            if base.endswith("/proxy/groq") or base.endswith("/proxy/groq/"):
+                from synth_ai.task.server import prepare_for_groq, inject_system_hint
+                # Prepare payload similar to server-side proxy
+                model = request.get("model") if isinstance(request.get("model"), str) else None
+                payload = prepare_for_groq(model, request)
+                payload = inject_system_hint(payload, "")
+                # Call vendor directly
+                gk = os.getenv("GROQ_API_KEY") or ""
+                async with httpx.AsyncClient(timeout=timeout) as client:
+                    resp = await client.post(
+                        "https://api.groq.com/openai/v1/chat/completions",
+                        json=payload,
+                        headers={"Authorization": f"Bearer {gk}"},
+                    )
+                    resp.raise_for_status()
+                    return resp.json()
+        except Exception as _local_proxy_err:
+            # Do NOT fall back silently; surface the error so callers fail fast
+            raise
         # Fix parameter compatibility for newer models
         processed_request = self._fix_model_parameters(request, target_url=url)
@@ -227,11 +278,7 @@ class OpenAIClient:
                 logger.info(
                     f"Inference response status=200, content-type={content_type}, bytes={len(body_text)}"
                 )
-                if body_text:
-                    preview_len = min(800, len(body_text))
-                    logger.info(
-                        f"Inference response preview ({preview_len} bytes): {body_text[:preview_len]}"
-                    )
+                # Do not log prompt or full response body
                 result = response.json()
                 logger.info(f"Inference response parsed_type={type(result).__name__}")
@@ -243,34 +290,10 @@ class OpenAIClient:
             except httpx.HTTPStatusError as e:
                 status = e.response.status_code if e.response is not None else None
                 text = e.response.text if e.response is not None else str(e)
-                # Log full body for debugging remote failures
-                try:
-                    logger.error(
-                        {
-                            "openai_http_error": True,
-                            "status": status,
-                            "url": url,
-                            "body": text,
-                        }
-                    )
-                except Exception:
-                    logger.error(f"HTTP error from {url}: {status} - {text}")
+                # Log minimal error info only
+                logger.error({"openai_http_error": True, "status": status})
                 # For 4xx/5xx, print full sanitized request to aid debugging (especially Groq 400s)
-                try:
-                    redacted_headers = dict(headers)
-                    if "Authorization" in redacted_headers:
-                        redacted_headers["Authorization"] = "***REDACTED***"
-                    logger.error(
-                        {
-                            "request_debug": True,
-                            "status": status,
-                            "target": url,
-                            "headers": redacted_headers,
-                            "payload": processed_request,
-                        }
-                    )
-                except Exception:
-                    pass
+                # Suppress prompt/payload logging entirely
                 # Special case: token budget exceeded (OpenAI-compatible error schema)
                 try:
                     if status == 400 and e.response is not None:
@@ -324,8 +347,6 @@ class OpenAIClient:
                                     logger.warning(
                                         {
                                             "token_budget_recovery": True,
-                                            "messages_tokens": messages_tokens,
-                                            "model_limit": model_limit,
                                             "retry_max_tokens": new_max,
                                         }
                                     )
@@ -348,13 +369,8 @@ class OpenAIClient:
                         try:
                             err = e.response.json()
                         except Exception:
-                            err = {"error": "unprocessable", "detail": (text or "")[:200]}
-                        logger.warning(
-                            {
-                                "inference_422_recovered": True,
-                                "detail": err,
-                            }
-                        )
+                            err = {"error": "unprocessable"}
+                        logger.warning({"inference_422_recovered": True})
                     except Exception:
                         pass
                     # Return a minimal OpenAI-compatible response with no tool_calls/content
@@ -471,6 +487,54 @@ class OpenAIClient:
                                 f"Inference service overloaded (400). {response_data} Retrying after {wait_time}s..."
                             )
                         else:
+                            error_block = response_data.get("error")
+                            error_code = ""
+                            if isinstance(error_block, dict):
+                                error_code = str(
+                                    error_block.get("code") or error_block.get("type") or ""
+                                ).lower()
+                            if error_code in {"tool_use_failed", "tool_call_failed"}:
+                                logger.warning(
+                                    {
+                                        "tool_use_failed": True,
+                                        "target": (base_url or self.base_url),
+                                        "message": error_block.get("message") if isinstance(error_block, dict) else None,
+                                    }
+                                )
+                                fallback_actions = ["move_right", "move_up", "do"]
+                                fallback_response = {
+                                    "id": f"fallback-{int(time.time() * 1000)}",
+                                    "object": "chat.completion",
+                                    "created": int(time.time()),
+                                    "model": processed_request.get("model"),
+                                    "choices": [
+                                        {
+                                            "index": 0,
+                                            "message": {
+                                                "role": "assistant",
+                                                "content": "",
+                                                "tool_calls": [
+                                                    {
+                                                        "id": f"call_fallback_{int(time.time() * 1000)}",
+                                                        "type": "function",
+                                                        "function": {
+                                                            "name": "interact_many",
+                                                            "arguments": json.dumps(
+                                                                {"actions": fallback_actions}
+                                                            ),
+                                                        },
+                                                    }
+                                                ],
+                                            },
+                                            "finish_reason": "tool_calls",
+                                        }
+                                    ],
+                                }
+                                if isinstance(response_data.get("usage"), dict):
+                                    fallback_response["usage"] = response_data["usage"]
+                                if isinstance(error_block, dict):
+                                    fallback_response["error"] = error_block
+                                return fallback_response
                             # This is a different type of 400 error, don't retry
                             try:
                                 redacted_headers = {}

examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py CHANGED Viewed

@@ -9,6 +9,8 @@ from typing import Any
 from fastapi import APIRouter, HTTPException, Request
 from pydantic import BaseModel
+from synth_ai.task.auth import allowed_environment_api_keys, normalize_environment_api_key
 from .envs.crafter.policy import CrafterPolicy
 from .inference.openai_client import create_inference_client
 from .registry import registry
@@ -435,34 +437,33 @@ async def step_policy(
                     elif role == "user":
                         user_prompt_records.append(record)
+                last_user_chars = (
+                    len(user_prompt_records[-1].get("text", "")) if user_prompt_records else 0
+                )
                 logger.info(
-                    "PROMPTS: system_msgs=%d user_msgs=%d last_user_chars=%d",
+                    "PROMPTS: system_msgs=%d user_msgs=%d last_user_chars=%d (content suppressed)",
                     len(system_prompt_records),
                     len(user_prompt_records),
-                    len(user_prompt_records[-1].get("text", "")) if user_prompt_records else 0,
+                    last_user_chars,
                 )
-                if system_prompt_records:
-                    logger.info("PROMPT_DUMP_SYSTEM_BEGIN")
-                    for idx, rec in enumerate(system_prompt_records):
-                        smsg = rec.get("text", "")
-                        logger.info(f"SYSTEM[{idx}]\n{smsg}")
-                    logger.info("PROMPT_DUMP_SYSTEM_END")
-                if user_prompt_records:
-                    logger.info("PROMPT_DUMP_USER_BEGIN")
-                    for idx, rec in enumerate(user_prompt_records):
-                        umsg = rec.get("text", "")
-                        logger.info(f"USER[{idx}]\n{umsg}")
-                    logger.info("PROMPT_DUMP_USER_END")
-                    # Print concise preview for visibility in standard logs
-                    with contextlib.suppress(Exception):
-                        last_user = (
-                            user_prompt_records[-1].get("text", "")
-                            if user_prompt_records
-                            else ""
-                        )
-                        print(f"[task:crafter] user prompt: {last_user}", flush=True)
+                log_prompt_details = (
+                    os.getenv("CRAFT_LOG_PROMPTS", "").strip().lower()
+                    in {"1", "true", "yes", "debug"}
+                )
+                if log_prompt_details:
+                    if system_prompt_records:
+                        logger.info("PROMPT_DETAILS_SYSTEM_BEGIN")
+                        for idx, rec in enumerate(system_prompt_records):
+                            smsg = rec.get("text", "")
+                            logger.info("SYSTEM[%d]: %s", idx, smsg)
+                        logger.info("PROMPT_DETAILS_SYSTEM_END")
+                    if user_prompt_records:
+                        logger.info("PROMPT_DETAILS_USER_BEGIN")
+                        for idx, rec in enumerate(user_prompt_records):
+                            umsg = rec.get("text", "")
+                            logger.info("USER[%d]: %s", idx, umsg)
+                        logger.info("PROMPT_DETAILS_USER_END")
             except Exception as e:
                 logger.warning(f"PROMPT_DUMP_FAILED: {e}")
@@ -524,15 +525,29 @@ async def step_policy(
                     masked = "<masked>"
                 logger.debug(f"INFERENCE_AUTH: Using bearer key {masked}")
             else:
-                logger.warning(
-                    "INFERENCE_AUTH: No API key resolved for inference request; downstream may 401"
+                logger.debug(
+                    "INFERENCE_AUTH: No bearer key resolved for inference request (expected when using in-app proxy)"
                 )
             client = create_inference_client(task_app, api_key=api_key_override)
-            # Add policy identification header for observability
+            # Add policy identification header and task auth for proxy fallback
             policy_name = getattr(policy, "name", "") or type(policy).__name__.lower()
             extra_headers = {"X-Policy-Name": policy_name}
+            try:
+                env_key = normalize_environment_api_key()
+                if not env_key:
+                    allowed_keys = allowed_environment_api_keys()
+                    if allowed_keys:
+                        env_key = next(iter(sorted(allowed_keys)))
+                if isinstance(env_key, str) and env_key:
+                    extra_headers["X-API-Key"] = env_key
+                else:
+                    logger.warning(
+                        "INFERENCE_AUTH: Failed to resolve ENVIRONMENT_API_KEY for proxy request headers"
+                    )
+            except Exception as exc:
+                logger.warning(f"INFERENCE_AUTH: Error resolving ENVIRONMENT_API_KEY: {exc}")
             # Apply input truncation to avoid 422 from inference server
             try:
@@ -761,26 +776,7 @@ async def step_policy(
                             }
             # Emit the exact prompt/messages and tools before calling the LLM (bounded preview)
-            with contextlib.suppress(Exception):
-                req_dump = meta.get("inference_request", {})
-                msgs = req_dump.get("messages")
-                tools_dump = req_dump.get("tools")
-                if isinstance(msgs, list):
-                    # Print compact messages structure and tool schema with bounded length
-                    import json as _json
-                    msgs_compact = _json.dumps(msgs)[:20000]
-                    tools_compact = (
-                        _json.dumps(tools_dump)[:8000] if tools_dump is not None else None
-                    )
-                    print(
-                        {
-                            "llm.call": True,
-                            "policy": str(policy_name),
-                            "messages_preview": msgs_compact,
-                            "tools_preview": tools_compact,
-                        }
-                    )
+            # Do not print prompts; only log response content later
             # Normalize request for non-OpenAI endpoints (strict schemas)
             with contextlib.suppress(Exception):

synth-ai 0.2.12__py3-none-any.whl → 0.2.13.dev1__py3-none-any.whl

Potentially problematic release.

synth-ai 0.2.12py3-none-any.whl → 0.2.13.dev1py3-none-any.whl