PyPI - synth-ai - Versions diffs - 0.2.12__py3-none-any.whl → 0.2.13.dev1__py3-none-any.whl - Mend

synth-ai 0.2.12py3-none-any.whl → 0.2.13.dev1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of synth-ai might be problematic. Click here for more details.

Files changed (48) hide show

examples/agora_ex/README_MoE.md +224 -0
examples/agora_ex/__init__.py +7 -0
examples/agora_ex/agora_ex.py +65 -0
examples/agora_ex/agora_ex_task_app.py +590 -0
examples/agora_ex/configs/rl_lora_qwen3_moe_2xh200.toml +121 -0
examples/agora_ex/reward_fn_grpo-human.py +129 -0
examples/agora_ex/system_prompt_CURRENT.md +63 -0
examples/agora_ex/task_app/agora_ex_task_app.py +590 -0
examples/agora_ex/task_app/reward_fn_grpo-human.py +129 -0
examples/agora_ex/task_app/system_prompt_CURRENT.md +63 -0
examples/multi_step/configs/crafter_rl_outcome.toml +74 -0
examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +175 -0
examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +83 -0
examples/multi_step/configs/crafter_rl_stepwise_simple.toml +78 -0
examples/multi_step/crafter_rl_lora.md +51 -10
examples/multi_step/sse_metrics_streaming_notes.md +357 -0
examples/multi_step/task_app_config_notes.md +7 -1
examples/warming_up_to_rl/configs/eval_stepwise_complex.toml +4 -2
examples/warming_up_to_rl/configs/eval_stepwise_simple.toml +4 -2
examples/warming_up_to_rl/run_eval.py +127 -18
examples/warming_up_to_rl/task_app/grpo_crafter.py +3 -33
examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +109 -45
examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +42 -46
examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +232 -193
synth_ai/__init__.py +41 -1
synth_ai/api/train/builders.py +49 -19
synth_ai/api/train/configs/__init__.py +44 -0
synth_ai/api/train/configs/rl.py +133 -0
synth_ai/api/train/configs/sft.py +94 -0
synth_ai/api/train/configs/shared.py +24 -0
synth_ai/cli/demo.py +38 -39
synth_ai/cli/rl_demo.py +81 -102
synth_ai/cli/task_apps.py +3 -0
synth_ai/demos/core/cli.py +121 -159
synth_ai/environments/examples/crafter_classic/environment.py +16 -0
synth_ai/evals/__init__.py +15 -0
synth_ai/evals/client.py +85 -0
synth_ai/evals/types.py +42 -0
synth_ai/judge_schemas.py +127 -0
synth_ai/rubrics/__init__.py +22 -0
synth_ai/rubrics/validators.py +126 -0
synth_ai/tracing_v3/serialization.py +130 -0
{synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev1.dist-info}/METADATA +1 -1
{synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev1.dist-info}/RECORD +48 -22
{synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev1.dist-info}/entry_points.txt +0 -1
{synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev1.dist-info}/WHEEL +0 -0
{synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev1.dist-info}/licenses/LICENSE +0 -0
{synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev1.dist-info}/top_level.txt +0 -0

synth_ai/demos/core/cli.py CHANGED Viewed

@@ -1,6 +1,5 @@
 from __future__ import annotations
-import argparse
 import contextlib
 import json
 import os
@@ -45,7 +44,7 @@ def _is_modal_public_url(u: str) -> bool:
         return False
-def cmd_setup(_args: argparse.Namespace) -> int:
+def setup() -> int:
     # Change to demo directory if stored
     demo_dir = demo_core.load_demo_dir()
     if demo_dir and os.path.isdir(demo_dir):
@@ -760,7 +759,9 @@ def _ensure_task_app_ready(env: DemoEnv, synth_key: str, *, label: str) -> DemoE
     return updated_env
-def cmd_deploy(args: argparse.Namespace) -> int:
+def deploy(
+    local: bool = False, app: str | None = None, name: str | None = None, script: str | None = None
+) -> int:
     # Change to demo directory if stored
     demo_dir = demo_core.load_demo_dir()
     if demo_dir and os.path.isdir(demo_dir):
@@ -774,7 +775,7 @@ def cmd_deploy(args: argparse.Namespace) -> int:
     url = ""
     app_name = env.task_app_name or ""
     try:
-        if args.local:
+        if local:
             print("Starting local Task App…")
             import subprocess
@@ -798,7 +799,7 @@ def cmd_deploy(args: argparse.Namespace) -> int:
                 time.sleep(1)
         else:
             # Auto-detect app path if not supplied; prompt interactively from discovered ASGI apps
-            app_path = os.path.abspath(args.app) if args.app else None
+            app_path = os.path.abspath(app) if app else None
             if not app_path or not os.path.isfile(app_path):
                 # First pass: look for known common filenames
                 candidates = [
@@ -828,13 +829,13 @@ def cmd_deploy(args: argparse.Namespace) -> int:
                             choice = 1
                         choice = max(1, min(choice, len(found)))
                         app_path = str(found[choice - 1].resolve())
-            if not app_path and args.script:
+            if not app_path and script:
                 # Legacy script fallback if user supplied --script explicitly
                 from synth_ai.demos.demo_task_apps.math.deploy_modal import deploy as modal_deploy
-                url = modal_deploy(script_path=args.script, env_api_key=env.env_api_key)
-                if args.name:
-                    app_name = args.name
+                url = modal_deploy(script_path=script, env_api_key=env.env_api_key)
+                if name:
+                    app_name = name
             else:
                 if not app_path:
                     entered = input("Path to Modal app.py (e.g., ./task_app.py): ").strip()
@@ -845,7 +846,7 @@ def cmd_deploy(args: argparse.Namespace) -> int:
                     raise FileNotFoundError(f"App file not found: {app_path}")
                 # Surface the app path before asking for the name
                 print(f"Using task app: {app_path}")
-                existing_name = (args.name or env.task_app_name or "").strip()
+                existing_name = (name or env.task_app_name or "").strip()
                 if not existing_name:
                     existing_name = f"synth-{os.path.splitext(os.path.basename(app_path))[0]}"
                 suggested_name = existing_name
@@ -1128,7 +1129,7 @@ def _ensure_modal_installed() -> None:
         print("\n   You can deploy later after authenticating.\n")
-def cmd_init(args: argparse.Namespace) -> int:
+def init(template: str | None = None, dest: str | None = None, force: bool = False) -> int:
     """Materialise a demo task app template into the current directory."""
     templates = list(list_demo_templates())
@@ -1137,37 +1138,44 @@ def cmd_init(args: argparse.Namespace) -> int:
         return 1
     selected: DemoTemplate | None = None
-    if args.template:
-        selected = get_demo_template(args.template)
+    if template:
+        selected = get_demo_template(template)
         if selected is None:
             available = ", ".join(t.template_id for t in templates)
-            print(f"Unknown template '{args.template}'. Available: {available}")
+            print(f"Unknown template '{template}'. Available: {available}")
             return 1
     else:
-        print("Select a demo template:" + "\n")
-        for idx, template in enumerate(templates, start=1):
-            print(f"  [{idx}] {template.name} ({template.template_id})")
-            print(f"      {template.description}")
-        try:
-            choice_raw = input(f"Enter choice [1-{len(templates)}] (default 1): ").strip() or "1"
-        except Exception:
-            choice_raw = "1"
-        if not choice_raw.isdigit():
-            print("Selection must be a number.")
-            return 1
-        choice_idx = int(choice_raw)
-        if not 1 <= choice_idx <= len(templates):
-            print("Selection out of range.")
-            return 1
-        selected = templates[choice_idx - 1]
+        if force:
+            selected = templates[0]
+            print(
+                f"Using default template: {selected.name} ({selected.template_id}) "
+                f"(pass --template to choose another)"
+            )
+        else:
+            print("Select a demo template:" + "\n")
+            for idx, tpl in enumerate(templates, start=1):
+                print(f"  [{idx}] {tpl.name} ({tpl.template_id})")
+                print(f"      {tpl.description}")
+            try:
+                choice_raw = input(f"Enter choice [1-{len(templates)}] (default 1): ").strip() or "1"
+            except Exception:
+                choice_raw = "1"
+            if not choice_raw.isdigit():
+                print("Selection must be a number.")
+                return 1
+            choice_idx = int(choice_raw)
+            if not 1 <= choice_idx <= len(templates):
+                print("Selection out of range.")
+                return 1
+            selected = templates[choice_idx - 1]
     assert selected is not None
     default_subdir = selected.default_subdir or selected.template_id
     # Check if default destination is already occupied and switch to local_demos/ if needed
-    if args.dest:
-        default_dest = Path(args.dest).expanduser().resolve()
+    if dest:
+        default_dest = Path(dest).expanduser().resolve()
     else:
         primary_dest = Path.cwd() / default_subdir
         if primary_dest.exists() and any(primary_dest.iterdir()):
@@ -1176,10 +1184,13 @@ def cmd_init(args: argparse.Namespace) -> int:
         else:
             default_dest = primary_dest.resolve()
-    try:
-        dest_input = input(f"Destination directory [{default_dest}]: ").strip()
-    except Exception:
+    if force:
         dest_input = ""
+    else:
+        try:
+            dest_input = input(f"Destination directory [{default_dest}]: ").strip()
+        except Exception:
+            dest_input = ""
     destination = Path(dest_input).expanduser().resolve() if dest_input else default_dest
     # Track whether we should skip individual file prompts (if we already cleared the directory)
@@ -1190,15 +1201,18 @@ def cmd_init(args: argparse.Namespace) -> int:
             print(f"Destination {destination} is a file. Provide a directory path.")
             return 1
         if any(destination.iterdir()):
-            try:
-                response = (
-                    input(f"Destination {destination} is not empty. Overwrite? [y/N]: ")
-                    .strip()
-                    .lower()
-                )
-            except (EOFError, KeyboardInterrupt):
-                print("\nCancelled.")
-                return 1
+            if force:
+                response = "y"
+            else:
+                try:
+                    response = (
+                        input(f"Destination {destination} is not empty. Overwrite? [y/N]: ")
+                        .strip()
+                        .lower()
+                    )
+                except (EOFError, KeyboardInterrupt):
+                    print("\nCancelled.")
+                    return 1
             if response not in ("y", "yes"):
                 print("Cancelled. Choose another directory or delete the existing one.")
                 return 1
@@ -1236,15 +1250,18 @@ def cmd_init(args: argparse.Namespace) -> int:
             # Handle directory copying
             if src_path.is_dir():
                 if dest_path.exists() and not directory_cleared:
-                    try:
-                        response = (
-                            input(f"Directory {dest_path.name} exists. Overwrite? [y/N]: ")
-                            .strip()
-                            .lower()
-                        )
-                    except (EOFError, KeyboardInterrupt):
-                        print("\nCancelled.")
-                        return 1
+                    if force:
+                        response = "y"
+                    else:
+                        try:
+                            response = (
+                                input(f"Directory {dest_path.name} exists. Overwrite? [y/N]: ")
+                                .strip()
+                                .lower()
+                            )
+                        except (EOFError, KeyboardInterrupt):
+                            print("\nCancelled.")
+                            return 1
                     if response not in ("y", "yes"):
                         print(f"Skipping {dest_path.name}")
                         continue
@@ -1256,15 +1273,18 @@ def cmd_init(args: argparse.Namespace) -> int:
                 # Handle file copying
                 dest_path.parent.mkdir(parents=True, exist_ok=True)
                 if dest_path.exists() and not directory_cleared:
-                    try:
-                        response = (
-                            input(f"File {dest_path.name} exists. Overwrite? [y/N]: ")
-                            .strip()
-                            .lower()
-                        )
-                    except (EOFError, KeyboardInterrupt):
-                        print("\nCancelled.")
-                        return 1
+                    if force:
+                        response = "y"
+                    else:
+                        try:
+                            response = (
+                                input(f"File {dest_path.name} exists. Overwrite? [y/N]: ")
+                                .strip()
+                                .lower()
+                            )
+                        except (EOFError, KeyboardInterrupt):
+                            print("\nCancelled.")
+                            return 1
                     if response not in ("y", "yes"):
                         print(f"Skipping {dest_path.name}")
                         continue
@@ -1280,11 +1300,14 @@ def cmd_init(args: argparse.Namespace) -> int:
             env_path = destination / ".env"
             should_write = True
             if env_path.exists() and not directory_cleared:
-                try:
-                    response = input("File .env exists. Overwrite? [y/N]: ").strip().lower()
-                except (EOFError, KeyboardInterrupt):
-                    print("\nCancelled.")
-                    return 1
+                if force:
+                    response = "y"
+                else:
+                    try:
+                        response = input("File .env exists. Overwrite? [y/N]: ").strip().lower()
+                    except (EOFError, KeyboardInterrupt):
+                        print("\nCancelled.")
+                        return 1
                 should_write = response in ("y", "yes")
             if should_write:
                 _write_text(env_path, "\n".join(selected.env_lines) + "\n")
@@ -1296,13 +1319,16 @@ def cmd_init(args: argparse.Namespace) -> int:
             cfg_dst = (destination / selected.config_destination).resolve()
             should_copy = True
             if cfg_dst.exists() and not directory_cleared:
-                try:
-                    response = (
-                        input(f"File {cfg_dst.name} exists. Overwrite? [y/N]: ").strip().lower()
-                    )
-                except (EOFError, KeyboardInterrupt):
-                    print("\nCancelled.")
-                    return 1
+                if force:
+                    response = "y"
+                else:
+                    try:
+                        response = (
+                            input(f"File {cfg_dst.name} exists. Overwrite? [y/N]: ").strip().lower()
+                        )
+                    except (EOFError, KeyboardInterrupt):
+                        print("\nCancelled.")
+                        return 1
                 should_copy = response in ("y", "yes")
             if should_copy:
                 cfg_dst.parent.mkdir(parents=True, exist_ok=True)
@@ -1388,7 +1414,14 @@ def _write_text(path: str, content: str) -> None:
 # Note: `prepare` command has been removed; configuration now prepares TOML
-def cmd_run(args: argparse.Namespace) -> int:
+def run(
+    config: str | None = None,
+    batch_size: int | None = None,
+    group_size: int | None = None,
+    model: str | None = None,
+    timeout: int = 600,
+    dry_run: bool = False,
+) -> int:
     # Change to demo directory if stored
     demo_dir = demo_core.load_demo_dir()
     if demo_dir and os.path.isdir(demo_dir):
@@ -1429,7 +1462,7 @@ def cmd_run(args: argparse.Namespace) -> int:
     import tomllib
     try:
-        cfg_path = _select_or_create_config(getattr(args, "config", None), env)
+        cfg_path = _select_or_create_config(config, env)
     except FileNotFoundError as exc:
         print(exc)
         return 1
@@ -1451,12 +1484,12 @@ def cmd_run(args: argparse.Namespace) -> int:
         # Optional: TRAINER_START_URL passthrough if already set in environment
         run_env["TRAINER_START_URL"] = run_env.get("TRAINER_START_URL", "")
         # Forward convenience knobs
-        if args.batch_size is not None:
-            run_env["RL_BATCH_SIZE"] = str(int(args.batch_size))
-        if args.group_size is not None:
-            run_env["RL_GROUP_SIZE"] = str(int(args.group_size))
-        if args.model:
-            run_env["RL_MODEL"] = args.model
+        if batch_size is not None:
+            run_env["RL_BATCH_SIZE"] = str(int(batch_size))
+        if group_size is not None:
+            run_env["RL_GROUP_SIZE"] = str(int(group_size))
+        if model:
+            run_env["RL_MODEL"] = model
         cmd = ["uv", "run", "python", launcher]
         print(f"Launching monorepo clustered runner: {' '.join(cmd)}")
         code = _popen_stream(cmd, env=run_env)
@@ -1484,11 +1517,11 @@ def cmd_run(args: argparse.Namespace) -> int:
         inline_cfg = tomllib.load(fh)
     with open(cfg_path) as fh2:
         toml_text = fh2.read()
-    if args.batch_size is not None:
-        inline_cfg.setdefault("training", {})["batch_size"] = int(args.batch_size)
-    if args.group_size is not None:
-        inline_cfg.setdefault("training", {})["group_size"] = int(args.group_size)
-    model_name = args.model or (inline_cfg.get("model", {}) or {}).get("name", "Qwen/Qwen3-0.6B")
+    if batch_size is not None:
+        inline_cfg.setdefault("training", {})["batch_size"] = int(batch_size)
+    if group_size is not None:
+        inline_cfg.setdefault("training", {})["group_size"] = int(group_size)
+    model_name = model or (inline_cfg.get("model", {}) or {}).get("name", "Qwen/Qwen3-0.6B")
     api = env.dev_backend_url.rstrip("/") + ("" if env.dev_backend_url.endswith("/api") else "/api")
     # Print backend and key preview before request for clearer diagnostics
     try:
@@ -1678,79 +1711,8 @@ def cmd_run(args: argparse.Namespace) -> int:
                 if name == "eval.reward_mean":
                     print(f"metric eval.reward_mean step={p.get('step')} value={p.get('value')}")
                     break
-        if time.time() - start_t > (args.timeout or 600):
+        if time.time() - start_t > (timeout or 600):
             print("Timeout waiting for terminal state.")
             break
         time.sleep(2)
     return 0
-def main(argv: list[str] | None = None) -> int:
-    p = argparse.ArgumentParser(prog="synth-ai")
-    sub = p.add_subparsers(dest="cmd")
-    def _add_parser(
-        names: list[str], *, configure: Callable[[argparse.ArgumentParser], None]
-    ) -> None:
-        for name in names:
-            parser = sub.add_parser(name)
-            configure(parser)
-    _add_parser(
-        ["rl_demo.setup", "demo.setup"],
-        configure=lambda parser: parser.set_defaults(func=cmd_setup),
-    )
-    def _init_opts(parser):
-        parser.add_argument("--template", type=str, default=None, help="Template id to instantiate")
-        parser.add_argument(
-            "--dest", type=str, default=None, help="Destination directory for files"
-        )
-        parser.set_defaults(func=cmd_init)
-    _add_parser(["rl_demo.init", "demo.init"], configure=_init_opts)
-    # (prepare command removed)
-    def _deploy_opts(parser):
-        parser.add_argument(
-            "--local", action="store_true", help="Run local FastAPI instead of Modal deploy"
-        )
-        parser.add_argument(
-            "--app", type=str, default=None, help="Path to Modal app.py for uv run modal deploy"
-        )
-        parser.add_argument("--name", type=str, default=None, help="Modal app name")
-        parser.add_argument(
-            "--script", type=str, default=None, help="Path to deploy_task_app.sh (optional legacy)"
-        )
-        parser.set_defaults(func=cmd_deploy)
-    _add_parser(["rl_demo.deploy", "demo.deploy"], configure=_deploy_opts)
-    _add_parser(
-        ["rl_demo.configure", "demo.configure"],
-        configure=lambda parser: parser.set_defaults(func=cmd_run),
-    )
-    def _run_opts(parser):
-        parser.add_argument(
-            "--config", type=str, default=None, help="Path to TOML config (skip prompt)"
-        )
-        parser.add_argument("--batch-size", type=int, default=None)
-        parser.add_argument("--group-size", type=int, default=None)
-        parser.add_argument("--model", type=str, default=None)
-        parser.add_argument("--timeout", type=int, default=600)
-        parser.add_argument("--dry-run", action="store_true", help="Print request body and exit")
-        parser.set_defaults(func=cmd_run)
-    _add_parser(["run", "rl_demo.run", "demo.run"], configure=_run_opts)
-    args = p.parse_args(argv)
-    if not hasattr(args, "func"):
-        p.print_help()
-        return 1
-    return int(args.func(args) or 0)
-if __name__ == "__main__":
-    sys.exit(main())

synth_ai/environments/examples/crafter_classic/environment.py CHANGED Viewed

@@ -190,6 +190,22 @@ class SynthCrafterObservationCallable(GetObservationCallable):
         obs_dict["truncated"] = priv.truncated
         if pub.error_info:
             obs_dict["tool_error"] = pub.error_info
+        counts_payload = {}
+        try:
+            counts = getattr(priv, "achievements_current_values", {}) or {}
+            for k, v in counts.items():
+                try:
+                    counts_payload[str(k)] = int(v)
+                except Exception:
+                    try:
+                        counts_payload[str(k)] = int(float(v))
+                    except Exception:
+                        continue
+            if counts_payload:
+                obs_dict["achievements_counts"] = counts_payload
+        except Exception:
+            # Best effort; omit counts if coercion fails
+            pass
         # Derive a simple local semantic patch around the player for easy rendering
         try:

synth_ai/evals/__init__.py ADDED Viewed

@@ -0,0 +1,15 @@
+from .client import JudgeClient, JudgeOptions, JudgeScoreResponse
+from .types import Judgement, RewardJudgement, RewardMetadata, Track, TrackAggregate
+__all__ = [
+	"JudgeClient",
+	"JudgeOptions",
+	"JudgeScoreResponse",
+	"Judgement",
+	"RewardJudgement",
+	"RewardMetadata",
+	"Track",
+	"TrackAggregate",
+]

synth_ai/evals/client.py ADDED Viewed

@@ -0,0 +1,85 @@
+from __future__ import annotations
+"""Experimental Judge API client.
+This surface is experimental and subject to change without notice.
+Set environment variable `SYNTH_SILENCE_EXPERIMENTAL=1` to silence warnings.
+"""
+import os
+import warnings
+from typing import Any, Literal, TypedDict
+from synth_ai.http import AsyncHttpClient, HTTPError
+from synth_ai.tracing_v3.serialization import normalize_for_json
+Provider = Literal["groq", "gemini"]
+class JudgeOptions(TypedDict, total=False):
+	event: bool
+	outcome: bool
+	rubric_id: str
+	rubric_overrides: dict[str, Any]
+	provider: Provider
+	model: str
+	max_concurrency: int
+class JudgeScoreResponse(TypedDict, total=False):
+	status: str
+	event_rewards: list[dict[str, Any]]
+	outcome_reward: dict[str, Any]
+	details: dict[str, Any]
+class JudgeClient:
+	def __init__(self, base_url: str, api_key: str, *, timeout: float = 60.0) -> None:
+		_silence = (os.getenv("SYNTH_SILENCE_EXPERIMENTAL") or "").strip().lower()
+		if _silence not in {"1", "true", "t", "yes", "y", "on"}:
+			warnings.warn(
+				"Experimental API: synth_ai.evals.JudgeClient is experimental and may change without notice.",
+				UserWarning,
+				stacklevel=2,
+			)
+		self._base = base_url.rstrip("/")
+		self._key = api_key
+		self._timeout = timeout
+	async def score(
+		self,
+		*,
+		trace: dict[str, Any] | Any,
+		policy_name: str,
+		task_app_id: str,
+		options: JudgeOptions,
+		task_app_base_url: str | None = None,
+	) -> JudgeScoreResponse:
+		body = {
+			"policy_name": policy_name,
+			"task_app": {"id": task_app_id, **({"base_url": task_app_base_url} if task_app_base_url else {})},
+			"trace": normalize_for_json(trace),
+			"options": options or {},
+		}
+		try:
+			async with AsyncHttpClient(self._base, self._key, timeout=self._timeout) as http:
+				js = await http.post_json("/api/judge/v1/score", json=body)
+				if not isinstance(js, dict):
+					raise ValueError("invalid_judge_response_shape")
+				return js  # type: ignore[return-value]
+		except HTTPError as e:  # map to friendlier exceptions
+			status = int(getattr(e, "status", 0) or 0)
+			if status in (400, 422):
+				raise ValueError(f"judge_validation_error: {e.detail}") from e
+			if status in (401, 403):
+				raise PermissionError(f"judge_auth_error: {e.detail}") from e
+			if status == 404:
+				raise FileNotFoundError(f"judge_route_not_found: {e.detail}") from e
+			if status == 429:
+				raise Exception("judge_rate_limited") from e  # replace with RetryLater in future
+			if status >= 500:
+				raise Exception("judge_transient_error") from e  # replace with TransientError in future
+			raise

synth_ai/evals/types.py ADDED Viewed

@@ -0,0 +1,42 @@
+from __future__ import annotations
+from typing import Literal, TypedDict
+Track = Literal["process", "reasoning", "progress", "outcome"]
+class Judgement(TypedDict, total=False):
+	key: str
+	title: str
+	description: str
+	score: float
+	reason: str
+	confidence: float
+	scale: Literal["binary", "bounded", "count", "custom"]
+	source: dict
+class RewardJudgement(TypedDict, total=False):
+	judgement: Judgement
+	scope: Literal["step", "event", "outcome"]
+	turn: int | None
+	episode_id: str | None
+	reward_value: float | None
+	links: dict
+class TrackAggregate(TypedDict, total=False):
+	mean: float
+	median: float
+	std: float
+	n: int
+class RewardMetadata(TypedDict, total=False):
+	per_window: list[RewardJudgement]
+	aggregates: dict[Track, TrackAggregate]
+	overall: dict[str, float]  # {"final_outcome_score": float}
+	rubric: dict               # {"ids": {...}, "hash": "..."}
+	model_info: dict           # {"model": "...", ...}

synth-ai 0.2.12__py3-none-any.whl → 0.2.13.dev1__py3-none-any.whl

Potentially problematic release.

synth-ai 0.2.12py3-none-any.whl → 0.2.13.dev1py3-none-any.whl