PyPI - synth-ai - Versions diffs - 0.2.8.dev12__py3-none-any.whl → 0.2.9.dev0__py3-none-any.whl - Mend

synth-ai 0.2.8.dev12py3-none-any.whl → 0.2.9.dev0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

synth_ai/api/train/__init__.py +5 -0
synth_ai/api/train/builders.py +165 -0
synth_ai/api/train/cli.py +450 -0
synth_ai/api/train/config_finder.py +168 -0
synth_ai/api/train/env_resolver.py +302 -0
synth_ai/api/train/pollers.py +66 -0
synth_ai/api/train/task_app.py +193 -0
synth_ai/api/train/utils.py +232 -0
synth_ai/cli/__init__.py +23 -0
synth_ai/cli/rl_demo.py +18 -6
synth_ai/cli/root.py +38 -6
synth_ai/cli/task_apps.py +1107 -0
synth_ai/demo_registry.py +258 -0
synth_ai/demos/core/cli.py +147 -111
synth_ai/demos/demo_task_apps/__init__.py +7 -1
synth_ai/demos/demo_task_apps/math/config.toml +55 -110
synth_ai/demos/demo_task_apps/math/modal_task_app.py +157 -21
synth_ai/demos/demo_task_apps/math/task_app_entry.py +39 -0
synth_ai/task/__init__.py +94 -1
synth_ai/task/apps/__init__.py +88 -0
synth_ai/task/apps/grpo_crafter.py +438 -0
synth_ai/task/apps/math_single_step.py +852 -0
synth_ai/task/auth.py +153 -0
synth_ai/task/client.py +165 -0
synth_ai/task/contracts.py +29 -14
synth_ai/task/datasets.py +105 -0
synth_ai/task/errors.py +49 -0
synth_ai/task/json.py +77 -0
synth_ai/task/proxy.py +258 -0
synth_ai/task/rubrics.py +212 -0
synth_ai/task/server.py +398 -0
synth_ai/task/tracing_utils.py +79 -0
synth_ai/task/vendors.py +61 -0
synth_ai/tracing_v3/session_tracer.py +13 -5
synth_ai/tracing_v3/storage/base.py +10 -12
synth_ai/tracing_v3/turso/manager.py +20 -6
{synth_ai-0.2.8.dev12.dist-info → synth_ai-0.2.9.dev0.dist-info}/METADATA +3 -2
{synth_ai-0.2.8.dev12.dist-info → synth_ai-0.2.9.dev0.dist-info}/RECORD +42 -18
{synth_ai-0.2.8.dev12.dist-info → synth_ai-0.2.9.dev0.dist-info}/WHEEL +0 -0
{synth_ai-0.2.8.dev12.dist-info → synth_ai-0.2.9.dev0.dist-info}/entry_points.txt +0 -0
{synth_ai-0.2.8.dev12.dist-info → synth_ai-0.2.9.dev0.dist-info}/licenses/LICENSE +0 -0
{synth_ai-0.2.8.dev12.dist-info → synth_ai-0.2.9.dev0.dist-info}/top_level.txt +0 -0

synth_ai/api/train/task_app.py ADDED Viewed

@@ -0,0 +1,193 @@
+from __future__ import annotations
+import json
+from dataclasses import dataclass
+from typing import Iterable
+import click
+import requests
+from .utils import CLIResult, http_get, run_cli
+@dataclass(slots=True)
+class TaskAppHealth:
+    ok: bool
+    health_status: int | None
+    task_info_status: int | None
+    detail: str | None = None
+def _health_response_ok(resp: requests.Response | None) -> tuple[bool, str]:
+    if resp is None:
+        return False, ""
+    status = resp.status_code
+    if status == 200:
+        return True, ""
+    if status in {401, 403}:
+        try:
+            payload = resp.json()
+        except ValueError:
+            payload = {}
+        prefix = payload.get("expected_api_key_prefix")
+        detail = str(payload.get("detail", ""))
+        if prefix or "expected prefix" in detail.lower():
+            note = "auth-optional"
+            if prefix:
+                note += f" (expected-prefix={prefix})"
+            return True, note
+    return False, ""
+def check_task_app_health(base_url: str, api_key: str, *, timeout: float = 10.0) -> TaskAppHealth:
+    # Send ALL known environment keys so the server can authorize any valid one
+    import os
+    headers = {"X-API-Key": api_key}
+    aliases = (os.getenv("ENVIRONMENT_API_KEY_ALIASES") or "").strip()
+    keys: list[str] = [api_key]
+    if aliases:
+        keys.extend([p.strip() for p in aliases.split(",") if p.strip()])
+    if keys:
+        headers["X-API-Keys"] = ",".join(keys)
+        headers.setdefault("Authorization", f"Bearer {api_key}")
+    base = base_url.rstrip("/")
+    detail_parts: list[str] = []
+    health_resp: requests.Response | None = None
+    health_ok = False
+    try:
+        health_resp = http_get(f"{base}/health", headers=headers, timeout=timeout)
+        health_ok, note = _health_response_ok(health_resp)
+        suffix = f" ({note})" if note else ""
+        # On non-200, include brief JSON detail if present
+        if not health_ok and health_resp is not None:
+            try:
+                hjs = health_resp.json()
+                # pull a few helpful fields without dumping everything
+                expected = hjs.get("expected_api_key_prefix")
+                authorized = hjs.get("authorized")
+                detail = hjs.get("detail")
+                extras = []
+                if authorized is not None:
+                    extras.append(f"authorized={authorized}")
+                if expected:
+                    extras.append(f"expected_prefix={expected}")
+                if detail:
+                    extras.append(f"detail={str(detail)[:80]}")
+                if extras:
+                    suffix += " [" + ", ".join(extras) + "]"
+            except Exception:
+                pass
+        detail_parts.append(f"/health={health_resp.status_code}{suffix}")
+    except requests.RequestException as exc:
+        detail_parts.append(f"/health_error={exc}")
+    task_resp: requests.Response | None = None
+    task_ok = False
+    try:
+        task_resp = http_get(f"{base}/task_info", headers=headers, timeout=timeout)
+        task_ok = bool(task_resp.status_code == 200)
+        if not task_ok and task_resp is not None:
+            try:
+                tjs = task_resp.json()
+                msg = tjs.get("detail") or tjs.get("status")
+                detail_parts.append(f"/task_info={task_resp.status_code} ({str(msg)[:80]})")
+            except Exception:
+                detail_parts.append(f"/task_info={task_resp.status_code}")
+        else:
+            detail_parts.append(f"/task_info={task_resp.status_code}")
+    except requests.RequestException as exc:
+        detail_parts.append(f"/task_info_error={exc}")
+    ok = bool(health_ok and task_ok)
+    detail = ", ".join(detail_parts)
+    return TaskAppHealth(
+        ok=ok,
+        health_status=None if health_resp is None else health_resp.status_code,
+        task_info_status=None if task_resp is None else task_resp.status_code,
+        detail=detail,
+    )
+@dataclass(slots=True)
+class ModalSecret:
+    name: str
+    value: str
+@dataclass(slots=True)
+class ModalApp:
+    app_id: str
+    label: str
+    url: str
+def _run_modal(args: Iterable[str]) -> CLIResult:
+    return run_cli(["modal", *args], timeout=30.0)
+def list_modal_secrets(pattern: str | None = None) -> list[str]:
+    result = _run_modal(["secret", "list"])
+    if result.code != 0:
+        raise click.ClickException(f"modal secret list failed: {result.stderr or result.stdout}")
+    names: list[str] = []
+    for line in result.stdout.splitlines():
+        line = line.strip()
+        if not line or line.startswith("NAME"):
+            continue
+        parts = line.split()
+        name = parts[0]
+        if pattern and pattern.lower() not in name.lower():
+            continue
+        names.append(name)
+    return names
+def get_modal_secret_value(name: str) -> str:
+    result = _run_modal(["secret", "get", name])
+    if result.code != 0:
+        raise click.ClickException(f"modal secret get {name} failed: {result.stderr or result.stdout}")
+    value = result.stdout.strip()
+    if not value:
+        raise click.ClickException(f"Secret {name} is empty")
+    return value
+def list_modal_apps(pattern: str | None = None) -> list[ModalApp]:
+    result = _run_modal(["app", "list"])
+    if result.code != 0:
+        raise click.ClickException(f"modal app list failed: {result.stderr or result.stdout}")
+    apps: list[ModalApp] = []
+    for line in result.stdout.splitlines():
+        line = line.strip()
+        if not line or line.startswith("APP"):
+            continue
+        parts = line.split()
+        if len(parts) < 3:
+            continue
+        app_id, label, url = parts[0], parts[1], parts[-1]
+        if pattern and pattern.lower() not in (label.lower() + url.lower() + app_id.lower()):
+            continue
+        apps.append(ModalApp(app_id=app_id, label=label, url=url))
+    return apps
+def format_modal_apps(apps: list[ModalApp]) -> str:
+    rows = [f"{idx}) {app.label} {app.url}" for idx, app in enumerate(apps, start=1)]
+    return "\n".join(rows)
+def format_modal_secrets(names: list[str]) -> str:
+    return "\n".join(f"{idx}) {name}" for idx, name in enumerate(names, start=1))
+__all__ = [
+    "ModalApp",
+    "ModalSecret",
+    "check_task_app_health",
+    "format_modal_apps",
+    "format_modal_secrets",
+    "get_modal_secret_value",
+    "list_modal_apps",
+    "list_modal_secrets",
+]

synth_ai/api/train/utils.py ADDED Viewed

@@ -0,0 +1,232 @@
+from __future__ import annotations
+import json
+import os
+import re
+import subprocess
+import sys
+import tempfile
+import time
+from dataclasses import dataclass
+from pathlib import Path
+from typing import Any, Iterable, Mapping
+import requests
+import tomllib
+REPO_ROOT = Path(__file__).resolve().parents[3]
+class TrainError(RuntimeError):
+    """Raised for interactive CLI failures."""
+def load_toml(path: Path) -> dict[str, Any]:
+    try:
+        with path.open("rb") as fh:
+            return tomllib.load(fh)
+    except FileNotFoundError as exc:  # pragma: no cover - guarded by CLI
+        raise TrainError(f"Config not found: {path}") from exc
+    except tomllib.TOMLDecodeError as exc:  # pragma: no cover - malformed input
+        raise TrainError(f"Failed to parse TOML: {path}\n{exc}") from exc
+def mask_value(value: str | None) -> str:
+    if not value:
+        return "<unset>"
+    value = str(value)
+    if len(value) <= 6:
+        return "****"
+    return f"{value[:4]}…{value[-2:]}"
+_ENV_LINE = re.compile(r"^\s*(?:export\s+)?(?P<key>[A-Za-z0-9_]+)\s*=\s*(?P<value>.*)$")
+def read_env_file(path: Path) -> dict[str, str]:
+    if not path.exists():
+        return {}
+    data: dict[str, str] = {}
+    for line in path.read_text(encoding="utf-8", errors="ignore").splitlines():
+        m = _ENV_LINE.match(line)
+        if not m:
+            continue
+        raw = m.group("value").strip()
+        if raw and raw[0] == raw[-1] and raw[0] in {'"', "'"} and len(raw) >= 2:
+            raw = raw[1:-1]
+        data[m.group("key")] = raw
+    return data
+def write_env_value(path: Path, key: str, value: str) -> None:
+    existing = []
+    if path.exists():
+        existing = path.read_text(encoding="utf-8", errors="ignore").splitlines()
+    updated = False
+    new_lines: list[str] = []
+    for line in existing:
+        m = _ENV_LINE.match(line)
+        if m and m.group("key") == key:
+            new_lines.append(f"{key}={value}")
+            updated = True
+        else:
+            new_lines.append(line)
+    if not updated:
+        new_lines.append(f"{key}={value}")
+    path.write_text("\n".join(new_lines) + "\n", encoding="utf-8")
+@dataclass(slots=True)
+class CLIResult:
+    code: int
+    stdout: str
+    stderr: str
+def run_cli(args: Iterable[str], *, cwd: Path | None = None, env: Mapping[str, str] | None = None, timeout: float | None = None) -> CLIResult:
+    proc = subprocess.run(
+        list(args),
+        cwd=cwd,
+        env=dict(os.environ, **(env or {})),
+        capture_output=True,
+        text=True,
+        timeout=timeout,
+    )
+    return CLIResult(code=proc.returncode, stdout=proc.stdout.strip(), stderr=proc.stderr.strip())
+def http_post(url: str, *, headers: Mapping[str, str] | None = None, json_body: Any | None = None, timeout: float = 60.0) -> requests.Response:
+    resp = requests.post(url, headers=dict(headers or {}), json=json_body, timeout=timeout)
+    return resp
+def http_get(url: str, *, headers: Mapping[str, str] | None = None, timeout: float = 30.0) -> requests.Response:
+    resp = requests.get(url, headers=dict(headers or {}), timeout=timeout)
+    return resp
+def post_multipart(url: str, *, api_key: str, file_field: str, file_path: Path, purpose: str = "fine-tune") -> requests.Response:
+    headers = {"Authorization": f"Bearer {api_key}"}
+    files = {file_field: (file_path.name, file_path.read_bytes(), "application/jsonl")}
+    data = {"purpose": purpose}
+    return requests.post(url, headers=headers, files=files, data=data, timeout=300)
+def fmt_duration(seconds: float) -> str:
+    if seconds < 60:
+        return f"{seconds:.1f}s"
+    minutes, secs = divmod(seconds, 60)
+    if minutes < 60:
+        return f"{int(minutes)}m{int(secs):02d}s"
+    hours, mins = divmod(minutes, 60)
+    return f"{int(hours)}h{int(mins):02d}m"
+def validate_sft_jsonl(path: Path, *, max_errors: int = 20) -> None:
+    errors: list[str] = []
+    try:
+        fh = path.open("r", encoding="utf-8")
+    except FileNotFoundError as exc:  # pragma: no cover - upstream ensures existence
+        raise TrainError(f"Dataset not found: {path}") from exc
+    with fh:
+        for idx, line in enumerate(fh, start=1):
+            stripped = line.strip()
+            if not stripped:
+                continue
+            try:
+                record = json.loads(stripped)
+            except json.JSONDecodeError as exc:
+                errors.append(f"Line {idx}: invalid JSON ({exc.msg})")
+                if len(errors) >= max_errors:
+                    break
+                continue
+            messages = record.get("messages")
+            if not isinstance(messages, list) or not messages:
+                errors.append(f"Line {idx}: missing or empty 'messages' list")
+                if len(errors) >= max_errors:
+                    break
+                continue
+            for msg_idx, msg in enumerate(messages):
+                if not isinstance(msg, dict):
+                    errors.append(f"Line {idx}: message {msg_idx} is not an object")
+                    break
+                if "role" not in msg or "content" not in msg:
+                    errors.append(f"Line {idx}: message {msg_idx} missing 'role' or 'content'")
+                    break
+                if not isinstance(msg["role"], str) or not isinstance(msg["content"], str):
+                    errors.append(f"Line {idx}: message {msg_idx} has non-string role/content")
+                    break
+            if len(errors) >= max_errors:
+                break
+    if errors:
+        suffix = "" if len(errors) < max_errors else f" (showing first {max_errors} issues)"
+        details = "\n - ".join(errors)
+        raise TrainError(f"Dataset validation failed{suffix}:\n - {details}")
+def limit_jsonl_examples(src: Path, limit: int) -> Path:
+    if limit <= 0:
+        raise TrainError("Example limit must be positive")
+    if not src.exists():
+        raise TrainError(f"Dataset not found: {src}")
+    tmp_dir = Path(tempfile.mkdtemp(prefix="sft_subset_"))
+    dest = tmp_dir / f"{src.stem}.head{limit}{src.suffix}"
+    written = 0
+    with src.open("r", encoding="utf-8") as fin, dest.open("w", encoding="utf-8") as fout:
+        for line in fin:
+            if not line.strip():
+                continue
+            fout.write(line)
+            written += 1
+            if written >= limit:
+                break
+    if written == 0:
+        raise TrainError("Subset dataset is empty; check limit value")
+    return dest
+def ensure_api_base(base: str) -> str:
+    base = base.rstrip("/")
+    if not base.endswith("/api"):
+        base = f"{base}/api"
+    return base
+def preview_json(data: Any, limit: int = 600) -> str:
+    try:
+        return json.dumps(data, indent=2)[:limit]
+    except Exception:
+        return str(data)[:limit]
+def sleep(seconds: float) -> None:
+    time.sleep(seconds)
+__all__ = [
+    "CLIResult",
+    "REPO_ROOT",
+    "TrainError",
+    "ensure_api_base",
+    "fmt_duration",
+    "http_get",
+    "http_post",
+    "load_toml",
+    "mask_value",
+    "post_multipart",
+    "preview_json",
+    "read_env_file",
+    "run_cli",
+    "sleep",
+    "limit_jsonl_examples",
+    "validate_sft_jsonl",
+    "write_env_value",
+]

synth_ai/cli/__init__.py CHANGED Viewed

@@ -75,3 +75,26 @@ try:
     _rl_demo.register(cli)
 except Exception:
     pass
+try:
+    from synth_ai.api.train import register as _train_register
+    _train_register(cli)
+except Exception:
+    pass
+from .task_apps import task_app_group
+cli.add_command(task_app_group, name="task-app")
+try:
+    from . import task_apps as _task_apps
+    _task_apps.register(cli)
+except Exception:
+    pass
+cli.add_command(task_app_group.commands['serve'], name='serve')
+cli.add_command(task_app_group.commands['deploy'], name='deploy')
+cli.add_command(task_app_group.commands['modal-serve'], name='modal-serve')

synth_ai/cli/rl_demo.py CHANGED Viewed

@@ -67,9 +67,15 @@ def register(cli):
         _forward(["rl_demo.configure"])
     @_rlg.command("init")
-    @click.option("--force", is_flag=True, help="Overwrite existing files in CWD")
-    def rl_init(force: bool):
+    @click.option("--template", type=str, default=None, help="Template id to instantiate")
+    @click.option("--dest", type=click.Path(), default=None, help="Destination directory for files")
+    @click.option("--force", is_flag=True, help="Overwrite existing files in destination")
+    def rl_init(template: str | None, dest: str | None, force: bool):
         args = ["rl_demo.init"]
+        if template:
+            args.extend(["--template", template])
+        if dest:
+            args.extend(["--dest", dest])
         if force:
             args.append("--force")
         _forward(args)
@@ -130,9 +136,15 @@ def register(cli):
         _forward(["rl_demo.configure"])
     @cli.command("rl_demo.init")
-    @click.option("--force", is_flag=True, help="Overwrite existing files in CWD")
-    def rl_init_alias(force: bool):
+    @click.option("--template", type=str, default=None, help="Template id to instantiate")
+    @click.option("--dest", type=click.Path(), default=None, help="Destination directory for files")
+    @click.option("--force", is_flag=True, help="Overwrite existing files in destination")
+    def rl_init_alias(template: str | None, dest: str | None, force: bool):
         args = ["rl_demo.init"]
+        if template:
+            args.extend(["--template", template])
+        if dest:
+            args.extend(["--dest", dest])
         if force:
             args.append("--force")
         _forward(args)
@@ -161,12 +173,12 @@ def register(cli):
         _forward(args)
     # Top-level convenience alias: `synth-ai deploy`
-    @cli.command("deploy")
+    @cli.command("demo-deploy")
     @click.option("--local", is_flag=True, help="Run local FastAPI instead of Modal deploy")
     @click.option("--app", type=click.Path(), default=None, help="Path to Modal app.py for uv run modal deploy")
     @click.option("--name", type=str, default="synth-math-demo", help="Modal app name")
     @click.option("--script", type=click.Path(), default=None, help="Path to deploy_task_app.sh (optional legacy)")
-    def deploy_top(local: bool, app: str | None, name: str, script: str | None):
+    def deploy_demo(local: bool, app: str | None, name: str, script: str | None):
         args: list[str] = ["rl_demo.deploy"]
         if local:
             args.append("--local")

synth_ai/cli/root.py CHANGED Viewed

@@ -14,6 +14,20 @@ import sys
 import time
 import click
+try:
+    from importlib.metadata import PackageNotFoundError, version as _pkg_version
+    try:
+        __pkg_version__ = _pkg_version("synth-ai")
+    except PackageNotFoundError:
+        try:
+            from synth_ai import __version__ as __pkg_version__  # type: ignore
+        except Exception:
+            __pkg_version__ = "unknown"
+except Exception:
+    try:
+        from synth_ai import __version__ as __pkg_version__  # type: ignore
+    except Exception:
+        __pkg_version__ = "unknown"
 def find_sqld_binary() -> str | None:
@@ -66,9 +80,10 @@ rm -rf "$TMP_DIR"
     return os.path.expanduser("~/.local/bin/sqld")
-@click.group()
+@click.group(help=f"Synth AI v{__pkg_version__} - Software for aiding the best and multiplying the will.")
+@click.version_option(version=__pkg_version__, prog_name="synth-ai")
 def cli():
-    """Synth AI - Software for aiding the best and multiplying the will."""
+    """Top-level command group for Synth AI."""
 # === Legacy demo command group (aliases new rl_demo implementation) ===
@@ -84,7 +99,7 @@ def _forward_to_demo(args: list[str]) -> None:
     except Exception as e:  # pragma: no cover
         click.echo(f"Failed to import demo CLI: {e}")
         sys.exit(1)
-    rc = int(demo_cli.main(args) or 0)
+    rc = int(getattr(demo_cli, "main")(args) or 0)  # type: ignore[attr-defined]
     if rc != 0:
         sys.exit(rc)
@@ -123,6 +138,22 @@ def setup():
     _forward_to_demo(["rl_demo.setup"])
+@demo.command()
+@click.option("--template", type=str, default=None, help="Template id to instantiate")
+@click.option("--dest", type=str, default=None, help="Destination directory for files")
+@click.option("--force", is_flag=True, help="Overwrite existing files in destination")
+def init(template: str | None, dest: str | None, force: bool):
+    """Copy demo task app template into the current directory."""
+    args: list[str] = ["demo.init"]
+    if template:
+        args.extend(["--template", template])
+    if dest:
+        args.extend(["--dest", dest])
+    if force:
+        args.append("--force")
+    _forward_to_demo(args)
 @demo.command()
 @click.option("--batch-size", type=int, default=None)
 @click.option("--group-size", type=int, default=None)
@@ -142,8 +173,8 @@ def run(batch_size: int | None, group_size: int | None, model: str | None, timeo
     _forward_to_demo(args)
-@cli.command()
-def setup():
+@cli.command(name="setup")
+def setup_command():
     """Perform SDK handshake and write keys to .env."""
     _forward_to_demo(["rl_demo.setup"])
@@ -164,7 +195,7 @@ def setup():
     default=True,
     help="Kill any process already bound to --env-port without prompting",
 )
-def serve(
+def serve_deprecated(
     db_file: str,
     sqld_port: int,
     env_port: int,
@@ -174,6 +205,7 @@ def serve(
     force: bool,
 ):
     logging.basicConfig(level=logging.INFO, format="%(message)s")
+    click.echo("⚠️  'synth-ai serve' now targets task apps; use 'synth-ai serve' for task apps or 'synth-ai serve-deprecated' for this legacy service.", err=True)
     processes = []
     def signal_handler(sig, frame):

synth-ai 0.2.8.dev12__py3-none-any.whl → 0.2.9.dev0__py3-none-any.whl

synth-ai 0.2.8.dev12py3-none-any.whl → 0.2.9.dev0py3-none-any.whl