PyPI - synth-ai - Versions diffs - 0.2.8.dev11__py3-none-any.whl → 0.2.8.dev13__py3-none-any.whl - Mend

synth-ai 0.2.8.dev11py3-none-any.whl → 0.2.8.dev13py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of synth-ai might be problematic. Click here for more details.

Files changed (37) hide show

synth_ai/api/train/__init__.py +5 -0
synth_ai/api/train/builders.py +165 -0
synth_ai/api/train/cli.py +429 -0
synth_ai/api/train/config_finder.py +120 -0
synth_ai/api/train/env_resolver.py +302 -0
synth_ai/api/train/pollers.py +66 -0
synth_ai/api/train/task_app.py +128 -0
synth_ai/api/train/utils.py +232 -0
synth_ai/cli/__init__.py +23 -0
synth_ai/cli/rl_demo.py +2 -2
synth_ai/cli/root.py +2 -1
synth_ai/cli/task_apps.py +520 -0
synth_ai/demos/demo_task_apps/math/modal_task_app.py +31 -25
synth_ai/task/__init__.py +94 -1
synth_ai/task/apps/__init__.py +88 -0
synth_ai/task/apps/grpo_crafter.py +438 -0
synth_ai/task/apps/math_single_step.py +852 -0
synth_ai/task/auth.py +132 -0
synth_ai/task/client.py +148 -0
synth_ai/task/contracts.py +29 -14
synth_ai/task/datasets.py +105 -0
synth_ai/task/errors.py +49 -0
synth_ai/task/json.py +77 -0
synth_ai/task/proxy.py +258 -0
synth_ai/task/rubrics.py +212 -0
synth_ai/task/server.py +398 -0
synth_ai/task/tracing_utils.py +79 -0
synth_ai/task/vendors.py +61 -0
synth_ai/tracing_v3/session_tracer.py +13 -5
synth_ai/tracing_v3/storage/base.py +10 -12
synth_ai/tracing_v3/turso/manager.py +20 -6
{synth_ai-0.2.8.dev11.dist-info → synth_ai-0.2.8.dev13.dist-info}/METADATA +3 -2
{synth_ai-0.2.8.dev11.dist-info → synth_ai-0.2.8.dev13.dist-info}/RECORD +37 -15
{synth_ai-0.2.8.dev11.dist-info → synth_ai-0.2.8.dev13.dist-info}/WHEEL +0 -0
{synth_ai-0.2.8.dev11.dist-info → synth_ai-0.2.8.dev13.dist-info}/entry_points.txt +0 -0
{synth_ai-0.2.8.dev11.dist-info → synth_ai-0.2.8.dev13.dist-info}/licenses/LICENSE +0 -0
{synth_ai-0.2.8.dev11.dist-info → synth_ai-0.2.8.dev13.dist-info}/top_level.txt +0 -0

synth_ai/api/train/utils.py ADDED Viewed

@@ -0,0 +1,232 @@
+from __future__ import annotations
+import json
+import os
+import re
+import subprocess
+import sys
+import tempfile
+import time
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any, Iterable, Mapping
+import requests
+import tomllib
+REPO_ROOT = Path(__file__).resolve().parents[3]
+class TrainError(RuntimeError):
+    """Raised for interactive CLI failures."""
+def load_toml(path: Path) -> dict[str, Any]:
+    try:
+        with path.open("rb") as fh:
+            return tomllib.load(fh)
+    except FileNotFoundError as exc:  # pragma: no cover - guarded by CLI
+        raise TrainError(f"Config not found: {path}") from exc
+    except tomllib.TOMLDecodeError as exc:  # pragma: no cover - malformed input
+        raise TrainError(f"Failed to parse TOML: {path}\n{exc}") from exc
+def mask_value(value: str | None) -> str:
+    if not value:
+        return "<unset>"
+    value = str(value)
+    if len(value) <= 6:
+        return "****"
+    return f"{value[:4]}…{value[-2:]}"
+_ENV_LINE = re.compile(r"^\s*(?:export\s+)?(?P<key>[A-Za-z0-9_]+)\s*=\s*(?P<value>.*)$")
+def read_env_file(path: Path) -> dict[str, str]:
+    if not path.exists():
+        return {}
+    data: dict[str, str] = {}
+    for line in path.read_text(encoding="utf-8", errors="ignore").splitlines():
+        m = _ENV_LINE.match(line)
+        if not m:
+            continue
+        raw = m.group("value").strip()
+        if raw and raw[0] == raw[-1] and raw[0] in {'"', "'"} and len(raw) >= 2:
+            raw = raw[1:-1]
+        data[m.group("key")] = raw
+    return data
+def write_env_value(path: Path, key: str, value: str) -> None:
+    existing = []
+    if path.exists():
+        existing = path.read_text(encoding="utf-8", errors="ignore").splitlines()
+    updated = False
+    new_lines: list[str] = []
+    for line in existing:
+        m = _ENV_LINE.match(line)
+        if m and m.group("key") == key:
+            new_lines.append(f"{key}={value}")
+            updated = True
+        else:
+            new_lines.append(line)
+    if not updated:
+        new_lines.append(f"{key}={value}")
+    path.write_text("\n".join(new_lines) + "\n", encoding="utf-8")
+@dataclass(slots=True)
+class CLIResult:
+    code: int
+    stdout: str
+    stderr: str
+def run_cli(args: Iterable[str], *, cwd: Path | None = None, env: Mapping[str, str] | None = None, timeout: float | None = None) -> CLIResult:
+    proc = subprocess.run(
+        list(args),
+        cwd=cwd,
+        env=dict(os.environ, **(env or {})),
+        capture_output=True,
+        text=True,
+        timeout=timeout,
+    )
+    return CLIResult(code=proc.returncode, stdout=proc.stdout.strip(), stderr=proc.stderr.strip())
+def http_post(url: str, *, headers: Mapping[str, str] | None = None, json_body: Any | None = None, timeout: float = 60.0) -> requests.Response:
+    resp = requests.post(url, headers=dict(headers or {}), json=json_body, timeout=timeout)
+    return resp
+def http_get(url: str, *, headers: Mapping[str, str] | None = None, timeout: float = 30.0) -> requests.Response:
+    resp = requests.get(url, headers=dict(headers or {}), timeout=timeout)
+    return resp
+def post_multipart(url: str, *, api_key: str, file_field: str, file_path: Path, purpose: str = "fine-tune") -> requests.Response:
+    headers = {"Authorization": f"Bearer {api_key}"}
+    files = {file_field: (file_path.name, file_path.read_bytes(), "application/jsonl")}
+    data = {"purpose": purpose}
+    return requests.post(url, headers=headers, files=files, data=data, timeout=300)
+def fmt_duration(seconds: float) -> str:
+    if seconds < 60:
+        return f"{seconds:.1f}s"
+    minutes, secs = divmod(seconds, 60)
+    if minutes < 60:
+        return f"{int(minutes)}m{int(secs):02d}s"
+    hours, mins = divmod(minutes, 60)
+    return f"{int(hours)}h{int(mins):02d}m"
+def validate_sft_jsonl(path: Path, *, max_errors: int = 20) -> None:
+    errors: list[str] = []
+    try:
+        fh = path.open("r", encoding="utf-8")
+    except FileNotFoundError as exc:  # pragma: no cover - upstream ensures existence
+        raise TrainError(f"Dataset not found: {path}") from exc
+    with fh:
+        for idx, line in enumerate(fh, start=1):
+            stripped = line.strip()
+            if not stripped:
+                continue
+            try:
+                record = json.loads(stripped)
+            except json.JSONDecodeError as exc:
+                errors.append(f"Line {idx}: invalid JSON ({exc.msg})")
+                if len(errors) >= max_errors:
+                    break
+                continue
+            messages = record.get("messages")
+            if not isinstance(messages, list) or not messages:
+                errors.append(f"Line {idx}: missing or empty 'messages' list")
+                if len(errors) >= max_errors:
+                    break
+                continue
+            for msg_idx, msg in enumerate(messages):
+                if not isinstance(msg, dict):
+                    errors.append(f"Line {idx}: message {msg_idx} is not an object")
+                    break
+                if "role" not in msg or "content" not in msg:
+                    errors.append(f"Line {idx}: message {msg_idx} missing 'role' or 'content'")
+                    break
+                if not isinstance(msg["role"], str) or not isinstance(msg["content"], str):
+                    errors.append(f"Line {idx}: message {msg_idx} has non-string role/content")
+                    break
+            if len(errors) >= max_errors:
+                break
+    if errors:
+        suffix = "" if len(errors) < max_errors else f" (showing first {max_errors} issues)"
+        details = "\n - ".join(errors)
+        raise TrainError(f"Dataset validation failed{suffix}:\n - {details}")
+def limit_jsonl_examples(src: Path, limit: int) -> Path:
+    if limit <= 0:
+        raise TrainError("Example limit must be positive")
+    if not src.exists():
+        raise TrainError(f"Dataset not found: {src}")
+    tmp_dir = Path(tempfile.mkdtemp(prefix="sft_subset_"))
+    dest = tmp_dir / f"{src.stem}.head{limit}{src.suffix}"
+    written = 0
+    with src.open("r", encoding="utf-8") as fin, dest.open("w", encoding="utf-8") as fout:
+        for line in fin:
+            if not line.strip():
+                continue
+            fout.write(line)
+            written += 1
+            if written >= limit:
+                break
+    if written == 0:
+        raise TrainError("Subset dataset is empty; check limit value")
+    return dest
+def ensure_api_base(base: str) -> str:
+    base = base.rstrip("/")
+    if not base.endswith("/api"):
+        base = f"{base}/api"
+    return base
+def preview_json(data: Any, limit: int = 600) -> str:
+    try:
+        return json.dumps(data, indent=2)[:limit]
+    except Exception:
+        return str(data)[:limit]
+def sleep(seconds: float) -> None:
+    time.sleep(seconds)
+__all__ = [
+    "CLIResult",
+    "REPO_ROOT",
+    "TrainError",
+    "ensure_api_base",
+    "fmt_duration",
+    "http_get",
+    "http_post",
+    "load_toml",
+    "mask_value",
+    "post_multipart",
+    "preview_json",
+    "read_env_file",
+    "run_cli",
+    "sleep",
+    "limit_jsonl_examples",
+    "validate_sft_jsonl",
+    "write_env_value",
+]

synth_ai/cli/__init__.py CHANGED Viewed

@@ -75,3 +75,26 @@ try:
     _rl_demo.register(cli)
 except Exception:
     pass
+try:
+    from synth_ai.api.train import register as _train_register
+    _train_register(cli)
+except Exception:
+    pass
+from .task_apps import task_app_group
+cli.add_command(task_app_group, name="task-app")
+try:
+    from . import task_apps as _task_apps
+    _task_apps.register(cli)
+except Exception:
+    pass
+cli.add_command(task_app_group.commands['serve'], name='serve')
+cli.add_command(task_app_group.commands['deploy'], name='deploy')
+cli.add_command(task_app_group.commands['modal-serve'], name='modal-serve')

synth_ai/cli/rl_demo.py CHANGED Viewed

@@ -161,12 +161,12 @@ def register(cli):
         _forward(args)
     # Top-level convenience alias: `synth-ai deploy`
-    @cli.command("deploy")
+    @cli.command("demo-deploy")
     @click.option("--local", is_flag=True, help="Run local FastAPI instead of Modal deploy")
     @click.option("--app", type=click.Path(), default=None, help="Path to Modal app.py for uv run modal deploy")
     @click.option("--name", type=str, default="synth-math-demo", help="Modal app name")
     @click.option("--script", type=click.Path(), default=None, help="Path to deploy_task_app.sh (optional legacy)")
-    def deploy_top(local: bool, app: str | None, name: str, script: str | None):
+    def deploy_demo(local: bool, app: str | None, name: str, script: str | None):
         args: list[str] = ["rl_demo.deploy"]
         if local:
             args.append("--local")

synth_ai/cli/root.py CHANGED Viewed

@@ -164,7 +164,7 @@ def setup():
     default=True,
     help="Kill any process already bound to --env-port without prompting",
 )
-def serve(
+def serve_deprecated(
     db_file: str,
     sqld_port: int,
     env_port: int,
@@ -174,6 +174,7 @@ def serve(
     force: bool,
 ):
     logging.basicConfig(level=logging.INFO, format="%(message)s")
+    click.echo("⚠️  'synth-ai serve' now targets task apps; use 'synth-ai serve' for task apps or 'synth-ai serve-deprecated' for this legacy service.", err=True)
     processes = []
     def signal_handler(sig, frame):

synth-ai 0.2.8.dev11__py3-none-any.whl → 0.2.8.dev13__py3-none-any.whl

Potentially problematic release.

synth-ai 0.2.8.dev11py3-none-any.whl → 0.2.8.dev13py3-none-any.whl