PyPI - synth-ai - Versions diffs - 0.2.8.dev12__py3-none-any.whl → 0.2.8.dev13__py3-none-any.whl - Mend

synth-ai 0.2.8.dev12py3-none-any.whl → 0.2.8.dev13py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of synth-ai might be problematic. Click here for more details.

Files changed (36) hide show

synth_ai/api/train/__init__.py +5 -0
synth_ai/api/train/builders.py +165 -0
synth_ai/api/train/cli.py +429 -0
synth_ai/api/train/config_finder.py +120 -0
synth_ai/api/train/env_resolver.py +302 -0
synth_ai/api/train/pollers.py +66 -0
synth_ai/api/train/task_app.py +128 -0
synth_ai/api/train/utils.py +232 -0
synth_ai/cli/__init__.py +23 -0
synth_ai/cli/rl_demo.py +2 -2
synth_ai/cli/root.py +2 -1
synth_ai/cli/task_apps.py +520 -0
synth_ai/task/__init__.py +94 -1
synth_ai/task/apps/__init__.py +88 -0
synth_ai/task/apps/grpo_crafter.py +438 -0
synth_ai/task/apps/math_single_step.py +852 -0
synth_ai/task/auth.py +132 -0
synth_ai/task/client.py +148 -0
synth_ai/task/contracts.py +29 -14
synth_ai/task/datasets.py +105 -0
synth_ai/task/errors.py +49 -0
synth_ai/task/json.py +77 -0
synth_ai/task/proxy.py +258 -0
synth_ai/task/rubrics.py +212 -0
synth_ai/task/server.py +398 -0
synth_ai/task/tracing_utils.py +79 -0
synth_ai/task/vendors.py +61 -0
synth_ai/tracing_v3/session_tracer.py +13 -5
synth_ai/tracing_v3/storage/base.py +10 -12
synth_ai/tracing_v3/turso/manager.py +20 -6
{synth_ai-0.2.8.dev12.dist-info → synth_ai-0.2.8.dev13.dist-info}/METADATA +3 -2
{synth_ai-0.2.8.dev12.dist-info → synth_ai-0.2.8.dev13.dist-info}/RECORD +36 -14
{synth_ai-0.2.8.dev12.dist-info → synth_ai-0.2.8.dev13.dist-info}/WHEEL +0 -0
{synth_ai-0.2.8.dev12.dist-info → synth_ai-0.2.8.dev13.dist-info}/entry_points.txt +0 -0
{synth_ai-0.2.8.dev12.dist-info → synth_ai-0.2.8.dev13.dist-info}/licenses/LICENSE +0 -0
{synth_ai-0.2.8.dev12.dist-info → synth_ai-0.2.8.dev13.dist-info}/top_level.txt +0 -0

synth_ai/task/proxy.py ADDED Viewed

@@ -0,0 +1,258 @@
+from __future__ import annotations
+"""Shared helpers for Task App proxy endpoints (OpenAI, Groq, etc.)."""
+import copy
+import json
+import re
+from typing import Any, Iterable, List, Tuple
+INTERACT_TOOL_SCHEMA: List[dict[str, Any]] = [
+    {
+        "type": "function",
+        "function": {
+            "name": "interact",
+            "description": "Perform one or more environment actions.",
+            "parameters": {
+                "type": "object",
+                "properties": {
+                    "actions": {
+                        "type": "array",
+                        "items": {"type": "string"},
+                        "description": "List of environment actions to execute in order.",
+                    },
+                    "reasoning": {
+                        "type": "string",
+                        "description": "Optional reasoning for the chosen actions.",
+                    },
+                },
+                "required": ["actions"],
+                "additionalProperties": False,
+            },
+        },
+    }
+]
+_REMOVE_FIELDS = {
+    "stop_after_tool_calls",
+    "thinking_mode",
+    "thinking_budget",
+    "reasoning",
+}
+_REMOVE_SAMPLING_FIELDS = {"temperature", "top_p"}
+_GPT5_MIN_COMPLETION_TOKENS = 16000
+def _ensure_tools(payload: dict[str, Any]) -> None:
+    tools = payload.get("tools")
+    if not isinstance(tools, list) or not tools:
+        payload["tools"] = copy.deepcopy(INTERACT_TOOL_SCHEMA)
+def prepare_for_openai(model: str | None, payload: dict[str, Any]) -> dict[str, Any]:
+    """Sanitise an OpenAI chat completions payload for Task App usage."""
+    sanitized = copy.deepcopy(payload)
+    for field in _REMOVE_FIELDS:
+        sanitized.pop(field, None)
+    if model and "gpt-5" in model:
+        max_tokens = sanitized.pop("max_tokens", None)
+        if "max_completion_tokens" not in sanitized and isinstance(max_tokens, int):
+            sanitized["max_completion_tokens"] = max_tokens
+        elif max_tokens is not None:
+            sanitized.setdefault("max_completion_tokens", max_tokens)
+        for field in _REMOVE_SAMPLING_FIELDS:
+            sanitized.pop(field, None)
+        mct = sanitized.get("max_completion_tokens")
+        if not isinstance(mct, int) or mct < _GPT5_MIN_COMPLETION_TOKENS:
+            sanitized["max_completion_tokens"] = _GPT5_MIN_COMPLETION_TOKENS
+        sanitized["tool_choice"] = {"type": "function", "function": {"name": "interact"}}
+        sanitized["parallel_tool_calls"] = False
+    _ensure_tools(sanitized)
+    return sanitized
+def prepare_for_groq(model: str | None, payload: dict[str, Any]) -> dict[str, Any]:
+    """Groq uses the OpenAI schema; reuse most normalisation rules."""
+    sanitized = prepare_for_openai(model, payload)
+    # Groq supports `max_tokens`; prefer their native parameter when present
+    if model and "gpt-5" not in (model or ""):
+        if "max_completion_tokens" in sanitized and "max_tokens" not in payload:
+            sanitized["max_tokens"] = sanitized.pop("max_completion_tokens")
+    return sanitized
+def inject_system_hint(payload: dict[str, Any], hint: str) -> dict[str, Any]:
+    """Insert or augment a system message with the provided hint (idempotent)."""
+    if not hint:
+        return payload
+    cloned = copy.deepcopy(payload)
+    messages = cloned.get("messages")
+    if not isinstance(messages, list):
+        return cloned
+    if messages and isinstance(messages[0], dict) and messages[0].get("role") == "system":
+        content = messages[0].get("content")
+        if isinstance(content, str) and hint not in content:
+            messages[0] = dict(messages[0])
+            messages[0]["content"] = content.rstrip() + ("\n\n" if content else "") + hint
+    else:
+        messages.insert(0, {"role": "system", "content": hint})
+    cloned["messages"] = messages
+    return cloned
+def extract_message_text(message: Any) -> str:
+    """Return best-effort text from an OpenAI-style message structure."""
+    if message is None:
+        return ""
+    if isinstance(message, str):
+        return message
+    if isinstance(message, list):
+        parts = [extract_message_text(part) for part in message]
+        return "\n".join(part for part in parts if part)
+    if isinstance(message, dict):
+        content = message.get("content")
+        if isinstance(content, str):
+            return content
+        if isinstance(content, list):
+            parts = []
+            for item in content:
+                text = extract_message_text(item)
+                if text:
+                    parts.append(text)
+            return "\n".join(parts)
+        if "text" in message and isinstance(message["text"], str):
+            return message["text"]
+    return str(message)
+def _parse_actions_from_json_candidate(candidate: Any) -> tuple[list[str], str]:
+    actions: list[str] = []
+    reasoning = ""
+    if isinstance(candidate, dict):
+        potential_actions = candidate.get("actions")
+        if isinstance(potential_actions, list):
+            actions = [str(a).strip() for a in potential_actions if str(a).strip()]
+        elif isinstance(potential_actions, str):
+            actions = [a.strip() for a in potential_actions.split(";") if a.strip()]
+        if "reasoning" in candidate and isinstance(candidate["reasoning"], str):
+            reasoning = candidate["reasoning"].strip()
+    return actions, reasoning
+def parse_tool_call_from_text(text: str) -> Tuple[list[str], str]:
+    """Derive tool-call actions and reasoning from assistant text."""
+    text = (text or "").strip()
+    if not text:
+        return [], ""
+    # Attempt to interpret the entire payload as JSON
+    try:
+        data = json.loads(text)
+    except Exception:
+        data = None
+    else:
+        actions, reasoning = _parse_actions_from_json_candidate(data)
+        if actions:
+            return actions, reasoning or text
+    # Look for embedded JSON objects containing an "actions" field
+    json_like_matches = re.findall(r"\{[^{}]*actions[^{}]*\}", text, flags=re.IGNORECASE)
+    for fragment in json_like_matches:
+        try:
+            data = json.loads(fragment)
+        except Exception:
+            continue
+        actions, reasoning = _parse_actions_from_json_candidate(data)
+        if actions:
+            return actions, reasoning or text
+    # Patterns like "Actions: move_right, jump"
+    m = re.search(r"actions?\s*:\s*([^\n]+)", text, flags=re.IGNORECASE)
+    if m:
+        items = [part.strip() for part in m.group(1).split(",") if part.strip()]
+        if items:
+            reasoning = text[:m.start()].strip()
+            return items, reasoning
+    # Patterns like "Action 1: move_right"
+    actions: list[str] = []
+    reasoning_lines: list[str] = []
+    for line in text.splitlines():
+        stripped = line.strip()
+        if not stripped:
+            continue
+        match = re.match(r"action\s*\d*\s*[:\-]\s*(.+)", stripped, flags=re.IGNORECASE)
+        if match:
+            candidate = match.group(1).strip()
+            if candidate:
+                actions.append(candidate)
+        else:
+            reasoning_lines.append(stripped)
+    if actions:
+        return actions, "\n".join(reasoning_lines).strip()
+    return [], text
+def _build_tool_call(actions: Iterable[str], reasoning: str) -> dict[str, Any]:
+    payload = {
+        "actions": [str(a).strip() for a in actions if str(a).strip()],
+    }
+    if reasoning.strip():
+        payload["reasoning"] = reasoning.strip()
+    return {
+        "id": "tool_interact_fallback",
+        "type": "function",
+        "function": {
+            "name": INTERACT_TOOL_SCHEMA[0]["function"]["name"],
+            "arguments": json.dumps(payload, ensure_ascii=False),
+        },
+    }
+def synthesize_tool_call_if_missing(openai_response: dict[str, Any]) -> dict[str, Any]:
+    """Ensure the first choice carries a tool_call derived from text if absent."""
+    if not isinstance(openai_response, dict):
+        return openai_response
+    choices = openai_response.get("choices")
+    if not isinstance(choices, list) or not choices:
+        return openai_response
+    first = choices[0]
+    if not isinstance(first, dict):
+        return openai_response
+    message = first.get("message")
+    if not isinstance(message, dict):
+        return openai_response
+    tool_calls = message.get("tool_calls")
+    if isinstance(tool_calls, list) and tool_calls:
+        return openai_response
+    text = extract_message_text(message)
+    actions, reasoning = parse_tool_call_from_text(text)
+    if not actions:
+        return openai_response
+    new_message = copy.deepcopy(message)
+    new_message["tool_calls"] = [
+        _build_tool_call(actions, reasoning)
+    ]
+    if "content" not in new_message:
+        new_message["content"] = None
+    new_first = copy.deepcopy(first)
+    new_first["message"] = new_message
+    new_choices = [new_first] + choices[1:]
+    result = copy.deepcopy(openai_response)
+    result["choices"] = new_choices
+    return result

synth_ai/task/rubrics.py ADDED Viewed

@@ -0,0 +1,212 @@
+from __future__ import annotations
+"""Rubric schema, loading, and scoring helpers for Task Apps."""
+import json
+from pathlib import Path
+from typing import Any, Dict, Iterable, Optional
+from pydantic import BaseModel, Field, field_validator
+class Criterion(BaseModel):
+    id: str
+    description: str
+    weight: float = 1.0
+    required: bool = False
+    @field_validator("weight")
+    @classmethod
+    def _validate_weight(cls, value: float) -> float:
+        if value <= 0:
+            raise ValueError("criterion weight must be positive")
+        return value
+class Rubric(BaseModel):
+    version: str
+    goal_text: str | None = None
+    criteria: list[Criterion] = Field(default_factory=list)
+    aggregation: str = "weighted_sum"
+    @field_validator("aggregation")
+    @classmethod
+    def _validate_aggregation(cls, value: str) -> str:
+        allowed = {"sum", "weighted_sum", "custom", "inherit"}
+        if value not in allowed:
+            raise ValueError(f"aggregation must be one of {sorted(allowed)}")
+        return value
+    @field_validator("criteria")
+    @classmethod
+    def _validate_criteria(cls, criteria: list[Criterion]) -> list[Criterion]:
+        seen = set()
+        for criterion in criteria:
+            if criterion.id in seen:
+                raise ValueError(f"duplicate criterion id: {criterion.id}")
+            seen.add(criterion.id)
+        return criteria
+def _load_text(source: str) -> tuple[str, Optional[str]]:
+    path = Path(source)
+    if path.exists():
+        return path.read_text(encoding="utf-8"), path.suffix.lower()
+    return source, None
+def _parse_structured(text: str, suffix: Optional[str]) -> Dict[str, Any]:
+    text = text.strip()
+    if not text:
+        raise ValueError("Rubric source is empty")
+    if suffix in (".yaml", ".yml"):
+        try:
+            import yaml  # type: ignore
+        except Exception as exc:  # pragma: no cover - optional dependency
+            raise RuntimeError("PyYAML is required to load YAML rubrics") from exc
+        data = yaml.safe_load(text)
+        if not isinstance(data, dict):
+            raise ValueError("Rubric YAML must produce a mapping")
+        return data
+    if text.startswith("{"):
+        return json.loads(text)
+    if text.startswith("http://") or text.startswith("https://"):
+        import requests  # type: ignore
+        response = requests.get(text, timeout=15)
+        response.raise_for_status()
+        return _parse_structured(response.text, suffix)
+    try:
+        return json.loads(text)
+    except json.JSONDecodeError:
+        try:
+            import yaml  # type: ignore
+        except Exception as exc:  # pragma: no cover - optional dependency
+            raise RuntimeError("PyYAML is required to load rubric text") from exc
+        data = yaml.safe_load(text)
+        if not isinstance(data, dict):
+            raise ValueError("Rubric text must decode to a mapping")
+        return data
+def load_rubric(source: str | dict[str, Any] | Rubric | None) -> Rubric | None:
+    if source is None:
+        return None
+    if isinstance(source, Rubric):
+        return source
+    if isinstance(source, dict):
+        return Rubric.model_validate(source)
+    text, suffix = _load_text(str(source))
+    data = _parse_structured(text, suffix)
+    return Rubric.model_validate(data)
+def _merge_weights(base: Criterion, override: Criterion) -> float:
+    if override.weight != 1.0 and base.weight != 1.0:
+        return base.weight * override.weight
+    if override.weight != 1.0:
+        return override.weight
+    return base.weight
+def blend_rubrics(base: Rubric | None, override: Rubric | None) -> Rubric | None:
+    if override is None and base is None:
+        return None
+    if base is None:
+        return override
+    if override is None:
+        return base
+    base_map = {criterion.id: criterion for criterion in base.criteria}
+    merged: list[Criterion] = []
+    for ov in override.criteria:
+        if ov.id in base_map:
+            existing = base_map.pop(ov.id)
+            merged.append(
+                Criterion(
+                    id=ov.id,
+                    description=ov.description or existing.description,
+                    weight=_merge_weights(existing, ov),
+                    required=ov.required if ov.required is not None else existing.required,
+                )
+            )
+        else:
+            merged.append(ov)
+    merged.extend(base_map.values())
+    aggregation = override.aggregation
+    if aggregation == "inherit":
+        aggregation = base.aggregation
+    return Rubric(
+        version=override.version or base.version,
+        goal_text=override.goal_text or base.goal_text,
+        criteria=merged,
+        aggregation=aggregation,
+    )
+def _as_float(value: Any) -> Optional[float]:
+    try:
+        return float(value)
+    except Exception:
+        return None
+def _score(criteria: Iterable[Criterion], values: Dict[str, float], aggregation: str) -> Dict[str, Any]:
+    if aggregation == "inherit":
+        aggregation = "weighted_sum"
+    per_criterion: Dict[str, Dict[str, Any]] = {}
+    total = 0.0
+    total_weight = 0.0
+    for criterion in criteria:
+        score = values.get(criterion.id, 0.0)
+        per_criterion[criterion.id] = {
+            "score": score,
+            "weight": criterion.weight,
+            "required": criterion.required,
+        }
+        if aggregation == "sum":
+            total += score
+        elif aggregation == "weighted_sum":
+            total += score * criterion.weight
+            total_weight += criterion.weight
+    if aggregation == "weighted_sum" and total_weight > 0:
+        total = total / total_weight
+    if aggregation == "custom":
+        total = None  # type: ignore[assignment]
+    return {
+        "aggregation": aggregation,
+        "score": total,
+        "per_criterion": per_criterion,
+    }
+def score_events_against_rubric(events: list[dict[str, Any]], rubric: Rubric | None) -> Dict[str, Any]:
+    if rubric is None:
+        return {"aggregation": "none", "score": None, "per_criterion": {}}
+    values: Dict[str, float] = {}
+    for event in events or []:
+        if not isinstance(event, dict):
+            continue
+        cid = event.get("criterion_id") or event.get("id") or event.get("criterion")
+        score = _as_float(event.get("score"))
+        if cid and score is not None:
+            values[str(cid)] = score
+    return _score(rubric.criteria, values, rubric.aggregation)
+def score_outcome_against_rubric(outcome: dict[str, Any], rubric: Rubric | None) -> Dict[str, Any]:
+    if rubric is None:
+        return {"aggregation": "none", "score": None, "per_criterion": {}}
+    values: Dict[str, float] = {}
+    if isinstance(outcome, dict):
+        candidates = outcome.get("criteria") if isinstance(outcome.get("criteria"), dict) else outcome
+        if isinstance(candidates, dict):
+            for key, value in candidates.items():
+                score = _as_float(value)
+                if score is not None:
+                    values[str(key)] = score
+    return _score(rubric.criteria, values, rubric.aggregation)

synth-ai 0.2.8.dev12__py3-none-any.whl → 0.2.8.dev13__py3-none-any.whl

Potentially problematic release.

synth-ai 0.2.8.dev12py3-none-any.whl → 0.2.8.dev13py3-none-any.whl