PyPI - synth-ai - Versions diffs - 0.2.6__py3-none-any.whl → 0.2.6.dev2__py3-none-any.whl - Mend

synth-ai 0.2.6py3-none-any.whl → 0.2.6.dev2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of synth-ai might be problematic. Click here for more details.

Files changed (23) hide show

synth_ai/__init__.py +18 -25
synth_ai/cli/rl_demo.py +52 -4
synth_ai/demos/core/cli.py +443 -40
synth_ai/demos/demo_task_apps/math/_common.py +17 -0
synth_ai/demos/demo_task_apps/math/modal_task_app.py +415 -0
synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_traces_sft_turso.py +23 -9
synth_ai/environments/service/app.py +13 -6
synth_ai/experimental/synth_oss.py +2 -2
synth_ai/http.py +26 -102
synth_ai/http_client.py +104 -0
synth_ai/lm/core/synth_models.py +2 -2
synth_ai/tracing_v3/decorators.py +1 -0
synth_ai/tracing_v3/hooks.py +1 -0
synth_ai/tracing_v3/session_tracer.py +18 -7
synth_ai/tracing_v3/turso/manager.py +3 -1
synth_ai/tracing_v3/turso/models.py +3 -0
synth_ai/tracing_v3/utils.py +1 -0
{synth_ai-0.2.6.dist-info → synth_ai-0.2.6.dev2.dist-info}/METADATA +1 -1
{synth_ai-0.2.6.dist-info → synth_ai-0.2.6.dev2.dist-info}/RECORD +23 -20
{synth_ai-0.2.6.dist-info → synth_ai-0.2.6.dev2.dist-info}/WHEEL +0 -0
{synth_ai-0.2.6.dist-info → synth_ai-0.2.6.dev2.dist-info}/entry_points.txt +0 -0
{synth_ai-0.2.6.dist-info → synth_ai-0.2.6.dev2.dist-info}/licenses/LICENSE +0 -0
{synth_ai-0.2.6.dist-info → synth_ai-0.2.6.dev2.dist-info}/top_level.txt +0 -0

synth_ai/demos/core/cli.py CHANGED Viewed

@@ -6,11 +6,23 @@ import os
 import sys
 import time
 from typing import Any, Dict, Callable
+import shutil
+import stat
 from synth_ai.demos.demo_task_apps import core as demo_core
 from synth_ai.demos.demo_task_apps.core import DemoEnv
+def _is_modal_public_url(u: str) -> bool:
+    try:
+        s = (u or "").strip().lower()
+        if not (s.startswith("http://") or s.startswith("https://")):
+            return False
+        return (".modal.run" in s) and ("modal.local" not in s) and ("pypi-mirror" not in s)
+    except Exception:
+        return False
 def cmd_check(_args: argparse.Namespace) -> int:
     env = demo_core.load_env()
     cwd_env_path = os.path.join(os.getcwd(), ".env")
@@ -21,6 +33,15 @@ def cmd_check(_args: argparse.Namespace) -> int:
         env = demo_core.load_env()
         local_env = demo_core.load_dotenv_file(cwd_env_path)
+    def _is_modal_public_url(u: str) -> bool:
+        try:
+            s = (u or "").strip().lower()
+            if not (s.startswith("http://") or s.startswith("https://")):
+                return False
+            return (".modal.run" in s) and ("modal.local" not in s) and ("pypi-mirror" not in s)
+        except Exception:
+            return False
     def _maybe_fix_task_url() -> None:
         if not env.task_app_name:
             return
@@ -28,9 +49,7 @@ def cmd_check(_args: argparse.Namespace) -> int:
         needs_lookup = False
         if not current:
             needs_lookup = True
-        elif not current.endswith(".run") or current.endswith(".moda") or current.count(".") < 2:
-            needs_lookup = True
-        elif not current.startswith("http://") and not current.startswith("https://"):
+        elif not _is_modal_public_url(current):
             needs_lookup = True
         if not needs_lookup:
             return
@@ -48,7 +67,7 @@ def cmd_check(_args: argparse.Namespace) -> int:
             return
         new_url = ""
         for token in out.split():
-            if token.startswith("http://") or token.startswith("https://"):
+            if _is_modal_public_url(token):
                 new_url = token.strip().rstrip("/")
                 break
         if new_url and new_url != current:
@@ -166,6 +185,45 @@ def _popen_stream(cmd: list[str], cwd: str | None = None, env: dict | None = Non
     return int(proc.returncode or 0)
+def _popen_stream_capture(cmd: list[str], cwd: str | None = None, env: dict | None = None) -> tuple[int, str]:
+    """Stream subprocess output to stdout and also capture it into a buffer."""
+    import subprocess
+    import threading
+    buf_lines: list[str] = []
+    try:
+        proc = subprocess.Popen(
+            cmd,
+            cwd=cwd,
+            env=env,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.STDOUT,
+            text=True,
+            bufsize=1,
+        )
+    except Exception as exc:
+        print(f"Failed to launch {' '.join(cmd)}: {exc}")
+        return 1, ""
+    def _pump(stdout) -> None:
+        try:
+            for line in stdout:
+                line = line.rstrip()
+                print(line)
+                buf_lines.append(line)
+        except Exception:
+            pass
+    if proc.stdout is not None:
+        t = threading.Thread(target=_pump, args=(proc.stdout,), daemon=True)
+        t.start()
+        proc.wait()
+        t.join(timeout=1.0)
+    else:
+        proc.wait()
+    return int(proc.returncode or 0), "\n".join(buf_lines)
 def cmd_deploy(args: argparse.Namespace) -> int:
     env = demo_core.load_env()
     url = ""
@@ -187,9 +245,12 @@ def cmd_deploy(args: argparse.Namespace) -> int:
             # Auto-detect app path if not supplied; prompt for name and confirmation.
             app_path = os.path.abspath(args.app) if args.app else None
             if not app_path or not os.path.isfile(app_path):
+                # Prefer the synth_demo/ app seeded by `rl_demo init` over any root-level files
                 candidates = [
+                    os.path.abspath(os.path.join(os.getcwd(), "synth_demo", "task_app.py")),
+                    os.path.abspath(os.path.join(os.getcwd(), "task_app.py")),
+                    os.path.abspath(os.path.join(os.getcwd(), "app.py")),
                     os.path.abspath(os.path.join(os.getcwd(), "math_task_app.py")),
-                    "/Users/joshpurtell/Documents/GitHub/monorepo/tests/applications/math/rl/math_task_app.py",
                 ]
                 app_path = next((p for p in candidates if os.path.isfile(p)), None)
             if not app_path and args.script:
@@ -200,12 +261,14 @@ def cmd_deploy(args: argparse.Namespace) -> int:
                     app_name = args.name
             else:
                 if not app_path:
-                    entered = input("Path to Modal app.py (e.g., tests/applications/math/rl/math_task_app.py): ").strip()
+                    entered = input("Path to Modal app.py (e.g., ./task_app.py): ").strip()
                     if not entered:
                         raise FileNotFoundError("No app.py path provided and auto-detect failed")
                     app_path = os.path.abspath(entered)
                 if not os.path.isfile(app_path):
                     raise FileNotFoundError(f"App file not found: {app_path}")
+                # Surface the app path before asking for the name
+                print(f"Using task app: {app_path}")
                 suggested_name = args.name or f"synth-{os.path.splitext(os.path.basename(app_path))[0]}"
                 name_in = input(f"Modal app name [{suggested_name}]: ").strip() or suggested_name
                 app_name = name_in
@@ -217,22 +280,44 @@ def cmd_deploy(args: argparse.Namespace) -> int:
                     print("Aborted by user.")
                     return 1
                 deploy_cmd = ["uv", "run", "python", "-m", "modal", "deploy", "--name", name_in, app_path]
-                code, out = _popen_capture(deploy_cmd)
-                print(out)
+                print("\nStreaming Modal build/deploy logs (this can take several minutes on first run)…\n")
+                code, deploy_logs = _popen_stream_capture(deploy_cmd)
                 if code != 0:
                     raise RuntimeError(f"modal deploy failed (exit {code})")
+                # Try to parse URL directly from streamed logs
+                if not url:
+                    try:
+                        import re as _re
+                        m_all = _re.findall(r"https?://[^\s]+\.modal\.run", deploy_logs or "")
+                        if m_all:
+                            url = m_all[-1].strip().rstrip("/")
+                    except Exception:
+                        pass
                 url_cmd = ["uv", "run", "python", "-m", "modal", "app", "url", name_in]
                 code2, out2 = _popen_capture(url_cmd)
                 if code2 == 0:
                     for token in out2.split():
-                        if token.startswith("http://") or token.startswith("https://"):
+                        if _is_modal_public_url(token):
                             url = token.strip().rstrip("/")
                             break
+                # Fallback: try reading recent Modal logs for the app to find a URL line
                 if not url:
-                    for token in (out + "\n" + out2).split():
-                        if token.startswith("http://") or token.startswith("https://"):
-                            url = token.strip().rstrip("/")
-                            break
+                    code3, out3 = _popen_capture(["uv", "run", "python", "-m", "modal", "app", "list"])
+                    if code3 == 0 and out3:
+                        for line in out3.splitlines():
+                            if name_in in line:
+                                for token in line.split():
+                                    if _is_modal_public_url(token):
+                                        url = token.strip().rstrip("/")
+                                        break
+                            if url:
+                                break
+                # Prompt user if still no valid URL
+                if not url:
+                    print("\nCould not auto-detect a public Modal URL for the app.")
+                    entered = input("Enter the Modal public URL (must contain '.modal.run'), or press Enter to abort: ").strip()
+                    if entered and _is_modal_public_url(entered):
+                        url = entered.rstrip("/")
                 if not url:
                     raise RuntimeError("Failed to resolve public URL from modal CLI output")
         if not url:
@@ -286,9 +371,34 @@ def cmd_configure(args: argparse.Namespace) -> int:
     demo_core.persist_dotenv_values({"ENVIRONMENT_API_KEY": env_key})
     task_url = env.task_app_base_url
-    if not task_url:
-        print("Task app URL missing. Run: uvx synth-ai rl_demo deploy")
-        return 1
+    if not task_url or not _is_modal_public_url(task_url):
+        # If we have an app name, offer to resolve from Modal first
+        resolved = ""
+        if env.task_app_name:
+            try:
+                choice = input(f"Resolve URL from Modal for app '{env.task_app_name}'? [Y/n]: ").strip().lower() or "y"
+                if choice.startswith("y"):
+                    code, out = _popen_capture([
+                        "uv", "run", "python", "-m", "modal", "app", "url", env.task_app_name
+                    ])
+                    if code == 0 and out:
+                        for tok in out.split():
+                            if _is_modal_public_url(tok):
+                                resolved = tok.strip().rstrip("/")
+                                break
+            except Exception:
+                resolved = ""
+        if not resolved:
+            print("Task app URL not configured or not a valid Modal public URL.")
+            print("Examples: https://<app-name>-fastapi-app.modal.run")
+            entered = input("Enter Task App base URL (must contain '.modal.run'), or press Enter to abort: ").strip()
+            if not entered or not _is_modal_public_url(entered):
+                print("Valid Task App URL is required. Run: uvx synth-ai rl_demo deploy")
+                return 1
+            task_url = entered.rstrip("/")
+        else:
+            task_url = resolved
+        demo_core.persist_task_url(task_url, name=(env.task_app_name or None))
     app_name = env.task_app_name.strip()
     if not app_name:
@@ -317,20 +427,56 @@ def cmd_configure(args: argparse.Namespace) -> int:
         secret_args.append(f"SYNTH_API_KEY={synth_for_secret}")
     create_cmd = ["uv", "run", "modal", "secret", "create", secret_name, *secret_args]
-    code, out = _popen_capture(create_cmd)
+    def _mask_args(args: list[str]) -> list[str]:
+        masked: list[str] = []
+        for a in args:
+            if "=" in a and any(a.startswith(k + "=") for k in ("ENVIRONMENT_API_KEY", "OPENAI_API_KEY", "SYNTH_API_KEY")):
+                try:
+                    k, v = a.split("=", 1)
+                    suf = v[-5:] if len(v) >= 5 else ""
+                    masked.append(f"{k}=***{suf}")
+                except Exception:
+                    masked.append("<masked>")
+            else:
+                masked.append(a)
+        return masked
+    print("\n[configure] Creating Modal secret (streaming logs)…")
+    print("[configure] Command:", " ".join(_mask_args(create_cmd)))
+    code = _popen_stream(create_cmd)
     if code != 0:
-        print(out)
-        print("Secret create failed; retrying with delete → create…")
-        _popen_capture(["uv", "run", "modal", "secret", "delete", secret_name])
-        code, out = _popen_capture(create_cmd)
+        print("[configure] Secret create failed; attempting delete → create")
+        delete_cmd = ["bash", "-lc", f"printf 'y\\n' | uv run modal secret delete {secret_name}"]
+        print("[configure] Command:", " ".join(delete_cmd))
+        _popen_stream(delete_cmd)
+        print("[configure] Retrying secret create…")
+        print("[configure] Command:", " ".join(_mask_args(create_cmd)))
+        code = _popen_stream(create_cmd)
         if code != 0:
-            print(out)
-            print("Failed to provision Modal secret.")
+            print("[configure] Failed to provision Modal secret.")
             return 2
     # Verify task app can read the secret by hitting rollout health with X-API-Key.
     rollout_url = task_url.rstrip("/") + "/health/rollout"
-    rc, body = _http("GET", rollout_url, headers={"X-API-Key": env_key})
+    print("[configure] Verifying rollout health:")
+    # Prefer rollout-specific health first (auth-aware), then plain /health
+    health_base = task_url.rstrip("/")
+    health_urls = [f"{health_base}/health/rollout", f"{health_base}/health"]
+    rc = 0
+    body = ""
+    for h in health_urls:
+        print("[configure] GET", h)
+        rc, body = _http("GET", h, headers={"X-API-Key": env_key})
+        if rc == 200:
+            rollout_url = h
+            break
+    print("[configure] status:", rc)
+    try:
+        import json as _json
+        preview = _json.dumps(body)[:800] if isinstance(body, dict) else str(body)[:800]
+    except Exception:
+        preview = str(body)[:800]
+    print("[configure] body:", preview)
     if rc != 200:
         print(f"Warning: rollout health check failed ({rc}). Response: {body}")
     else:
@@ -341,22 +487,26 @@ def cmd_configure(args: argparse.Namespace) -> int:
     env.task_app_name = app_name
     env.task_app_secret_name = secret_name
-    # Prepare a baseline TOML (formerly `prepare`): prompt and write demo_config.toml
-    defaults = [
-        os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "demo_task_apps", "math", "config.toml")),
-    ]
-    mono = "/Users/joshpurtell/Documents/GitHub/monorepo/tests/applications/math/rl/math_online.toml"
-    if os.path.isfile(mono):
-        defaults.append(mono)
-    print("Select a baseline TOML:")
-    for i, p in enumerate(defaults, 1):
-        print(f"  [{i}] {p}")
-    choice = input(f"Enter choice [1-{len(defaults)}] (default 1): ").strip() or "1"
-    try:
-        idx = max(1, min(int(choice), len(defaults))) - 1
-    except Exception:
-        idx = 0
-    base_path = defaults[idx]
+    # Prefer the seeded CWD config if present; otherwise fall back to packaged default
+    seeded_cfg = os.path.abspath(os.path.join(os.getcwd(), "demo_config.toml"))
+    if os.path.isfile(seeded_cfg):
+        base_path = seeded_cfg
+    else:
+        defaults = [
+            os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "demo_task_apps", "math", "config.toml")),
+        ]
+        mono = "/Users/joshpurtell/Documents/GitHub/monorepo/tests/applications/math/rl/math_online.toml"
+        if os.path.isfile(mono):
+            defaults.append(mono)
+        print("Select a baseline TOML:")
+        for i, p in enumerate(defaults, 1):
+            print(f"  [{i}] {p}")
+        choice = input(f"Enter choice [1-{len(defaults)}] (default 1): ").strip() or "1"
+        try:
+            idx = max(1, min(int(choice), len(defaults))) - 1
+        except Exception:
+            idx = 0
+        base_path = defaults[idx]
     with open(base_path, "r") as fh:
         text = fh.read()
     import re
@@ -413,6 +563,131 @@ def cmd_configure(args: argparse.Namespace) -> int:
     return 0
+def cmd_init(args: argparse.Namespace) -> int:
+    """Initialize a Modal-ready Math Task App in the current directory.
+    Copies `examples/rl/task_app.py` and `examples/rl/deploy_task_app.sh` into CWD.
+    Creates a `.env` with placeholders if it does not exist.
+    """
+    try:
+        # Ensure `modal` is installed for deployment flows
+        def _has_modal() -> bool:
+            try:
+                import importlib.util as _iu
+                return _iu.find_spec("modal") is not None
+            except Exception:
+                return False
+        if not _has_modal():
+            print("modal not found; installing…")
+            # Prefer uv if available; otherwise fallback to pip
+            try:
+                if shutil.which("uv"):
+                    code, out = _popen_capture(["uv", "pip", "install", "modal>=1.1.4"])
+                else:
+                    code, out = _popen_capture([sys.executable, "-m", "pip", "install", "modal>=1.1.4"])
+                if code != 0:
+                    print(out)
+                    print("Failed to install modal; continuing may fail.")
+                else:
+                    print("modal installed successfully.")
+            except Exception as e:
+                print(f"modal install error: {e}")
+            # Re-check
+            if not _has_modal():
+                print("Warning: modal is still not importable after install attempt.")
+        else:
+            print("modal found")
+        here = os.getcwd()
+        demo_dir = os.path.join(here, "synth_demo")
+        os.makedirs(demo_dir, exist_ok=True)
+        # Paths inside synth_demo/
+        dst_task_py = os.path.join(demo_dir, "task_app.py")
+        dst_deploy = os.path.join(demo_dir, "deploy_task_app.sh")
+        env_path = os.path.join(demo_dir, ".env")
+        dst_cfg = os.path.join(demo_dir, "demo_config.toml")
+        # Copy packaged math modal task app into synth_demo/task_app.py
+        src_modal = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "demo_task_apps", "math", "modal_task_app.py"))
+        if not os.path.isfile(src_modal):
+            print("Init failed: packaged math modal task app not found.")
+            print(f"Looked for: {src_modal}")
+            return 1
+        if os.path.exists(dst_task_py) and not getattr(args, "force", False):
+            print(f"Refusing to overwrite existing file: {dst_task_py} (use --force)")
+            return 1
+        shutil.copy2(src_modal, dst_task_py)
+        # Create deploy script in synth_demo/
+        deploy_text = """#!/usr/bin/env bash
+set -euo pipefail
+HERE=$(cd "$(dirname "$0")" && pwd)
+APP="$HERE/task_app.py"
+if [ -f "$HERE/.env" ]; then
+  # shellcheck disable=SC2046
+  export $(grep -v '^#' "$HERE/.env" | xargs -I{} echo {})
+fi
+uv run modal deploy "$APP" | tee "$HERE/.last_deploy.log"
+URL=$(grep -Eo 'https://[^ ]+\.modal\.run' "$HERE/.last_deploy.log" | tail -1 || true)
+if [ -n "$URL" ]; then
+  if grep -q '^TASK_APP_BASE_URL=' "$HERE/.env" 2>/dev/null; then
+    sed -i.bak "s#^TASK_APP_BASE_URL=.*#TASK_APP_BASE_URL=$URL#" "$HERE/.env" || true
+  else
+    echo "TASK_APP_BASE_URL=$URL" >> "$HERE/.env"
+  fi
+  echo "Saved TASK_APP_BASE_URL to $HERE/.env"
+fi
+"""
+        _write_text(dst_deploy, deploy_text)
+        try:
+            st = os.stat(dst_deploy)
+            os.chmod(dst_deploy, st.st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
+        except Exception:
+            pass
+        # Seed .env if not present
+        if not os.path.exists(env_path):
+            _write_text(env_path, "\n".join([
+                "# Required for task app auth to environment service",
+                "ENVIRONMENT_API_KEY=",
+                "",
+                "# Optional: for CLI job submission and proxying OpenAI models",
+                "SYNTH_API_KEY=",
+                "OPENAI_API_KEY=",
+                "",
+                "# Optional: set to 'prod' to use production names",
+                "ENVIRONMENT=",
+            ]) + "\n")
+        # Seed demo_config.toml from packaged default if not present (or overwrite with --force)
+        packaged_cfg = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "demo_task_apps", "math", "config.toml"))
+        try:
+            if os.path.isfile(packaged_cfg):
+                if not os.path.exists(dst_cfg) or getattr(args, "force", False):
+                    shutil.copy2(packaged_cfg, dst_cfg)
+        except Exception:
+            pass
+        print("Initialized Math Task App in synth_demo/:")
+        print(f"  - {dst_task_py}")
+        print(f"  - {dst_deploy}")
+        print(f"  - {env_path} (created if missing)")
+        if os.path.exists(dst_cfg):
+            print(f"  - {dst_cfg} (seeded)")
+        print("")
+        print("Next steps:")
+        print("  1) cd synth_demo && put your ENVIRONMENT_API_KEY in ./.env")
+        print("  2) Deploy to Modal:")
+        print("     uvx bash ./deploy_task_app.sh")
+        print("  3) From project root, run: uvx synth-ai rl_demo configure; uvx synth-ai rl_demo run")
+        return 0
+    except Exception as e:
+        print(f"Init error: {e}")
+        return 2
 def _http(method: str, url: str, headers: Dict[str, str] | None = None, body: Dict[str, Any] | None = None) -> tuple[int, Dict[str, Any] | str]:
     import urllib.request, urllib.error, json as _json
     data = None
@@ -493,6 +768,19 @@ def cmd_run(args: argparse.Namespace) -> int:
         code = _popen_stream(cmd, env=run_env)
         if code != 0:
             print(f"Clustered runner exited with code {code}")
+            # Actionable guidance for common auth issues
+            try:
+                base_url = backend_base.rstrip("/") + "/api"
+            except Exception:
+                base_url = backend_base
+            sk = (env.synth_api_key or "").strip()
+            ek = (env.env_api_key or "").strip()
+            print("Hint: If backend responded 401, verify SYNTH_API_KEY for:", base_url)
+            if sk:
+                print(f"  SYNTH_API_KEY len={len(sk)} last5={sk[-5:]}")
+            if ek:
+                print(f"  ENVIRONMENT_API_KEY len={len(ek)} last5={ek[-5:]}")
+            print("Also ensure your Modal secret contains ENVIRONMENT_API_KEY and matches the task app.")
         return code
     # Fallback: legacy jobs API flow
@@ -591,6 +879,17 @@ def cmd_run(args: argparse.Namespace) -> int:
         except Exception:
             print(str(js))
         print("Request body was:\n" + json.dumps(body, indent=2))
+        # Extra hints for auth failures
+        try:
+            sk = (env.synth_api_key or "").strip()
+            if int(code) == 401 or (isinstance(js, dict) and any(isinstance(v, str) and "Invalid API key" in v for v in js.values())):
+                base_url = env.dev_backend_url
+                print("Hint: HTTP 401 Unauthorized from backend. Verify SYNTH_API_KEY for:", base_url)
+                if sk:
+                    print(f"  SYNTH_API_KEY len={len(sk)} last5={sk[-5:]}")
+                print("Also ensure your Modal secret contains a valid ENVIRONMENT_API_KEY.")
+        except Exception:
+            pass
         return 2
     job_id = js.get("job_id") or js.get("id") or ""
     if not job_id:
@@ -639,6 +938,96 @@ def cmd_run(args: argparse.Namespace) -> int:
     return 0
+def cmd_eval(args: argparse.Namespace) -> int:
+    env = demo_core.load_env()
+    # Ensure required env
+    if not env.task_app_base_url:
+        print("Task app URL missing. Run: uvx synth-ai rl_demo deploy")
+        return 1
+    # Load config: prefer CWD demo_config.toml; else packaged default
+    cfg_path: str | None = None
+    if getattr(args, "config", None):
+        p = os.path.abspath(args.config)
+        if not os.path.isfile(p):
+            print(f"Config not found: {p}")
+            return 1
+        cfg_path = p
+    else:
+        cwd_prepared = os.path.abspath(os.path.join(os.getcwd(), "demo_config.toml"))
+        if os.path.isfile(cwd_prepared):
+            cfg_path = cwd_prepared
+        else:
+            packaged = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "demo_task_apps", "math", "config.toml"))
+            cfg_path = packaged if os.path.isfile(packaged) else None
+    if not cfg_path:
+        print("No config TOML found.")
+        return 1
+    import tomllib
+    with open(cfg_path, "rb") as fh:
+        inline_cfg = tomllib.load(fh)
+    # Model selection prompt
+    default_model = (args.model or (inline_cfg.get("model", {}) or {}).get("name") or "Qwen/Qwen3-0.6B")
+    entered = input(f"Model to evaluate [{default_model}]: ").strip()
+    model = entered or default_model
+    confirm = (input(f"Use model '{model}'? [Y/n]: ").strip().lower() or "y").startswith("y")
+    if not confirm:
+        print("Aborted by user.")
+        return 1
+    # Build on-board rollout request to the Task App (no backend RL job)
+    # Use Synth backend chat-completions proxy as inference URL (derive from DEV_BACKEND_URL)
+    # Ensure /api suffix for backend, then use proxy prefix for chat completions
+    backend_api = (env.dev_backend_url or "https://agent-learning.onrender.com/api").rstrip("/")
+    if not backend_api.endswith("/api"):
+        backend_api = f"{backend_api}/api"
+    inference_url = f"{backend_api}/proxy"
+    # ops: alternate agent/env for a small number of decisions (from config max_steps_per_episode if present)
+    try:
+        steps = int((inline_cfg.get("rollout", {}) or {}).get("max_steps_per_episode", 4))
+    except Exception:
+        steps = 4
+    ops: list[str] = []
+    for _ in range(max(1, steps // 2)):
+        ops.extend(["agent", "env"])
+    env_name = (inline_cfg.get("rollout", {}) or {}).get("env_name") or "math"
+    policy_name = (inline_cfg.get("rollout", {}) or {}).get("policy_name") or "math-react"
+    run_id = f"eval-{int(time.time())}"
+    body: Dict[str, Any] = {
+        "run_id": run_id,
+        "env": {
+            "env_name": env_name,
+            "config": inline_cfg.get("rollout", {}) or {},
+        },
+        "policy": {
+            "policy_name": policy_name,
+            "config": {"model": model, "inference_url": inference_url},
+        },
+        "ops": ops,
+        "on_done": "terminate",
+    }
+    # POST to task app rollout endpoint
+    headers = {"Content-Type": "application/json"}
+    if env.env_api_key:
+        headers["X-API-Key"] = env.env_api_key
+    rc, resp = _http("POST", env.task_app_base_url.rstrip("/") + "/rollout", headers=headers, body=body)
+    if rc not in (200, 201) or not isinstance(resp, dict):
+        print("Eval rollout failed:", rc)
+        try:
+            print(json.dumps(resp, indent=2) if isinstance(resp, dict) else str(resp))
+        except Exception:
+            print(str(resp))
+        print("Request body was:\n" + json.dumps(body, indent=2))
+        return 2
+    metrics = (resp.get("metrics") if isinstance(resp, dict) else None) or {}
+    mean = metrics.get("mean_return")
+    if mean is not None:
+        print(f"eval.reward_mean={mean}")
+    else:
+        print(json.dumps(resp, indent=2))
+    return 0
 def main(argv: list[str] | None = None) -> int:
     p = argparse.ArgumentParser(prog="synth-ai")
     sub = p.add_subparsers(dest="cmd")
@@ -650,6 +1039,12 @@ def main(argv: list[str] | None = None) -> int:
     _add_parser(["rl_demo.check", "demo.check"], configure=lambda parser: parser.set_defaults(func=cmd_check))
+    def _init_opts(parser):
+        parser.add_argument("--force", action="store_true", help="Overwrite existing files in CWD")
+        parser.set_defaults(func=cmd_init)
+    _add_parser(["rl_demo.init", "demo.init"], configure=_init_opts)
     # (prepare command removed)
     def _deploy_opts(parser):
@@ -674,6 +1069,14 @@ def main(argv: list[str] | None = None) -> int:
     _add_parser(["rl_demo.run", "demo.run"], configure=_run_opts)
+    def _eval_opts(parser):
+        parser.add_argument("--config", type=str, default=None, help="Path to TOML config (optional)")
+        parser.add_argument("--model", type=str, default=None, help="Model to evaluate (default Qwen/Qwen3-0.6B)")
+        parser.add_argument("--timeout", type=int, default=300, help="Seconds to wait for metrics")
+        parser.set_defaults(func=cmd_eval)
+    _add_parser(["rl_demo.eval", "demo.eval"], configure=_eval_opts)
     args = p.parse_args(argv)
     if not hasattr(args, "func"):
         p.print_help()

synth_ai/demos/demo_task_apps/math/_common.py ADDED Viewed

@@ -0,0 +1,17 @@
+from __future__ import annotations
+"""Minimal helpers for the math task app.
+This module provides a local fallback for install_problem_bank_into_shared so
+the modal task app can import it without requiring an external math_rl package.
+"""
+def install_problem_bank_into_shared() -> None:
+    """No-op placeholder for installing the Hendrycks MATH problem bank.
+    In production deployments, this can download or unpack the problem bank
+    into a shared directory. For the demo scaffold, it is a no-op.
+    """
+    return None

synth-ai 0.2.6__py3-none-any.whl → 0.2.6.dev2__py3-none-any.whl

Potentially problematic release.

synth-ai 0.2.6py3-none-any.whl → 0.2.6.dev2py3-none-any.whl