PyPI - synth-ai - Versions diffs - 0.2.10__py3-none-any.whl → 0.2.13.dev1__py3-none-any.whl - Mend

synth-ai 0.2.10py3-none-any.whl → 0.2.13.dev1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of synth-ai might be problematic. Click here for more details.

Files changed (73) hide show

examples/agora_ex/README_MoE.md +224 -0
examples/agora_ex/__init__.py +7 -0
examples/agora_ex/agora_ex.py +65 -0
examples/agora_ex/agora_ex_task_app.py +590 -0
examples/agora_ex/configs/rl_lora_qwen3_moe_2xh200.toml +121 -0
examples/agora_ex/reward_fn_grpo-human.py +129 -0
examples/agora_ex/system_prompt_CURRENT.md +63 -0
examples/agora_ex/task_app/agora_ex_task_app.py +590 -0
examples/agora_ex/task_app/reward_fn_grpo-human.py +129 -0
examples/agora_ex/task_app/system_prompt_CURRENT.md +63 -0
examples/multi_step/configs/crafter_rl_outcome.toml +74 -0
examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +175 -0
examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +83 -0
examples/multi_step/configs/crafter_rl_stepwise_simple.toml +78 -0
examples/multi_step/crafter_rl_lora.md +51 -10
examples/multi_step/sse_metrics_streaming_notes.md +357 -0
examples/multi_step/task_app_config_notes.md +494 -0
examples/warming_up_to_rl/configs/eval_stepwise_complex.toml +35 -0
examples/warming_up_to_rl/configs/eval_stepwise_consistent.toml +26 -0
examples/warming_up_to_rl/configs/eval_stepwise_per_achievement.toml +36 -0
examples/warming_up_to_rl/configs/eval_stepwise_simple.toml +32 -0
examples/warming_up_to_rl/run_eval.py +267 -41
examples/warming_up_to_rl/task_app/grpo_crafter.py +3 -33
examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +109 -45
examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +42 -46
examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +376 -193
synth_ai/__init__.py +41 -1
synth_ai/api/train/builders.py +74 -33
synth_ai/api/train/cli.py +29 -6
synth_ai/api/train/configs/__init__.py +44 -0
synth_ai/api/train/configs/rl.py +133 -0
synth_ai/api/train/configs/sft.py +94 -0
synth_ai/api/train/configs/shared.py +24 -0
synth_ai/api/train/env_resolver.py +18 -19
synth_ai/api/train/supported_algos.py +8 -5
synth_ai/api/train/utils.py +6 -1
synth_ai/cli/__init__.py +4 -2
synth_ai/cli/_storage.py +19 -0
synth_ai/cli/balance.py +14 -2
synth_ai/cli/calc.py +37 -22
synth_ai/cli/demo.py +38 -39
synth_ai/cli/legacy_root_backup.py +12 -14
synth_ai/cli/recent.py +12 -7
synth_ai/cli/rl_demo.py +81 -102
synth_ai/cli/status.py +4 -3
synth_ai/cli/task_apps.py +146 -137
synth_ai/cli/traces.py +4 -3
synth_ai/cli/watch.py +3 -2
synth_ai/demos/core/cli.py +121 -159
synth_ai/environments/examples/crafter_classic/environment.py +16 -0
synth_ai/evals/__init__.py +15 -0
synth_ai/evals/client.py +85 -0
synth_ai/evals/types.py +42 -0
synth_ai/jobs/client.py +15 -3
synth_ai/judge_schemas.py +127 -0
synth_ai/rubrics/__init__.py +22 -0
synth_ai/rubrics/validators.py +126 -0
synth_ai/task/server.py +14 -7
synth_ai/tracing_v3/decorators.py +51 -26
synth_ai/tracing_v3/examples/basic_usage.py +12 -7
synth_ai/tracing_v3/llm_call_record_helpers.py +107 -53
synth_ai/tracing_v3/replica_sync.py +8 -4
synth_ai/tracing_v3/serialization.py +130 -0
synth_ai/tracing_v3/storage/utils.py +11 -9
synth_ai/tracing_v3/turso/__init__.py +12 -0
synth_ai/tracing_v3/turso/daemon.py +2 -1
synth_ai/tracing_v3/turso/native_manager.py +28 -15
{synth_ai-0.2.10.dist-info → synth_ai-0.2.13.dev1.dist-info}/METADATA +4 -2
{synth_ai-0.2.10.dist-info → synth_ai-0.2.13.dev1.dist-info}/RECORD +73 -40
{synth_ai-0.2.10.dist-info → synth_ai-0.2.13.dev1.dist-info}/entry_points.txt +0 -1
{synth_ai-0.2.10.dist-info → synth_ai-0.2.13.dev1.dist-info}/WHEEL +0 -0
{synth_ai-0.2.10.dist-info → synth_ai-0.2.13.dev1.dist-info}/licenses/LICENSE +0 -0
{synth_ai-0.2.10.dist-info → synth_ai-0.2.13.dev1.dist-info}/top_level.txt +0 -0

examples/warming_up_to_rl/run_eval.py CHANGED Viewed

@@ -14,11 +14,14 @@ import contextlib
 import json
 import os
 import re
-import tomllib
+import sys
 from collections import Counter
+from copy import deepcopy
 from pathlib import Path
 from typing import Any
+import tomllib
 import httpx
@@ -115,26 +118,34 @@ class TaskAppClient:
         run_id: str,
         env_name: str,
         seed: int,
-        difficulty: str,
+        difficulty: str | None,
         policy_name: str,
         policy_config: dict[str, Any],
         max_turns: int,
+        env_config: dict[str, Any] | None = None,
+        ops: list[str] | None = None,
     ) -> dict[str, Any]:
-        ops: list[str] = []
-        for _ in range(max_turns):
-            ops.extend(["agent", "env"])
+        ops_seq: list[str] = list(ops) if ops is not None else []
+        if not ops_seq:
+            for _ in range(max_turns):
+                ops_seq.extend(["agent", "env"])
+        env_cfg: dict[str, Any] = {}
+        if isinstance(env_config, dict):
+            env_cfg.update(env_config)
+        if difficulty is not None and "difficulty" not in env_cfg:
+            env_cfg["difficulty"] = difficulty
         payload: dict[str, Any] = {
             "run_id": run_id,
             "env": {
                 "env_name": env_name,
-                "config": {"difficulty": difficulty},
+                "config": env_cfg,
                 "seed": seed,
             },
             "policy": {
                 "policy_name": policy_name,
                 "config": policy_config,
             },
-            "ops": ops,
+            "ops": ops_seq,
             "on_done": "terminate",
         }
         # Ensure X-API-Key is included
@@ -323,6 +334,12 @@ async def eval_episode(client: TaskAppClient, seed: int) -> dict[str, Any]:
     observation = created.get("observation") if isinstance(created, dict) else None
     if not isinstance(observation, dict):
         observation = {}
+    try:
+        ach_map_initial = observation.get("achievements_status")
+        if isinstance(ach_map_initial, dict):
+            achievements.update(k for k, v in ach_map_initial.items() if v)
+    except Exception:
+        pass
     try:
         while turns < MAX_TURNS and not done:
@@ -342,6 +359,12 @@ async def eval_episode(client: TaskAppClient, seed: int) -> dict[str, Any]:
                 nxt = step.get("observation")
                 if isinstance(nxt, dict):
                     observation = nxt
+                    try:
+                        ach_map = observation.get("achievements_status")
+                        if isinstance(ach_map, dict):
+                            achievements.update(k for k, v in ach_map.items() if v)
+                    except Exception:
+                        pass
     finally:
         with contextlib.suppress(Exception):
             await client.terminate(env_name, env_id)
@@ -349,21 +372,45 @@ async def eval_episode(client: TaskAppClient, seed: int) -> dict[str, Any]:
     return {"seed": seed, "turns": turns, "achievements": sorted(achievements)}
-async def main() -> None:
-    # Best-effort load local .env if present (ensures ENVIRONMENT_API_KEY for rollout)
+def _load_dotenv_defaults() -> None:
+    """Load .env-style key/value pairs without clobbering explicit exports."""
     try:
-        env_path = Path(__file__).resolve().parent / ".env"
-        if env_path.exists():
-            for line in env_path.read_text(encoding="utf-8").splitlines():
-                line = line.strip()
-                if not line or line.startswith("#") or "=" not in line:
-                    continue
-                k, v = line.split("=", 1)
-                k = k.strip()
-                v = v.strip().strip('"').strip("'")
-                os.environ.setdefault(k, v)
+        script_path = Path(__file__).resolve()
     except Exception:
-        pass
+        return
+    candidates: list[Path] = []
+    # Prefer the repo root .env, then allow per-directory overrides.
+    for base in [Path.cwd(), script_path.parent, *script_path.parents]:
+        env_path = base / ".env"
+        if env_path not in candidates and env_path.is_file():
+            candidates.append(env_path)
+    seen: set[str] = set()
+    try:
+        for env_path in candidates:
+            try:
+                for raw in env_path.read_text(encoding="utf-8").splitlines():
+                    line = raw.strip()
+                    if not line or line.startswith("#") or "=" not in line:
+                        continue
+                    key, value = line.split("=", 1)
+                    key = key.strip()
+                    if not key or key in seen:
+                        continue
+                    seen.add(key)
+                    val = value.strip().strip('"').strip("'")
+                    os.environ.setdefault(key, val)
+            except Exception:
+                continue
+    except Exception:
+        return
+async def main() -> None:
+    _load_dotenv_defaults()
+    if not (os.getenv("ENVIRONMENT_API_KEY") or os.getenv("DEV_ENVIRONMENT_API_KEY")):
+        raise RuntimeError(
+            "ENVIRONMENT_API_KEY is required. Export it or add it to your project .env."
+        )
     parser = argparse.ArgumentParser(
         description="Baseline eval against task app with optional TOML config"
@@ -415,11 +462,20 @@ async def main() -> None:
                 async with sem:
                     try:
                         run_id = f"eval-{seed}"
-                        # Build policy config from TOML (explicit control; no server-side guessing)
-                        policy_cfg: dict[str, Any] = {
-                            "model": cfg.get("model", MODEL),
-                            "inference_url": inf_url,
-                        }
+                        rollout_cfg_raw = cfg.get("rollout") or {}
+                        rollout_cfg = (
+                            dict(rollout_cfg_raw) if isinstance(rollout_cfg_raw, dict) else {}
+                        )
+                        env_config_raw = rollout_cfg.get("env_config") or {}
+                        env_config = (
+                            deepcopy(env_config_raw) if isinstance(env_config_raw, dict) else {}
+                        )
+                        policy_cfg_raw = rollout_cfg.get("policy_config") or {}
+                        policy_cfg = (
+                            deepcopy(policy_cfg_raw) if isinstance(policy_cfg_raw, dict) else {}
+                        )
+                        policy_cfg.setdefault("model", cfg.get("model", MODEL))
+                        policy_cfg.setdefault("inference_url", inf_url)
                         for k in (
                             "max_tokens",
                             "temperature",
@@ -428,20 +484,58 @@ async def main() -> None:
                             "thinking_budget",
                             "use_tools",
                         ):
-                            if k in cfg and cfg.get(k) is not None:
+                            if k in cfg and cfg.get(k) is not None and k not in policy_cfg:
                                 policy_cfg[k] = cfg.get(k)
+                        env_name = str(rollout_cfg.get("env_name") or "crafter")
+                        policy_name = str(
+                            rollout_cfg.get("policy_name") or cfg.get("policy_name") or "crafter"
+                        )
+                        max_turns_local = MAX_TURNS
+                        for candidate in (rollout_cfg.get("max_turns"), cfg.get("max_turns")):
+                            if candidate is None:
+                                continue
+                            with contextlib.suppress(Exception):
+                                max_turns_local = int(candidate)
+                                break
+                        difficulty_override: str | None = None
+                        if isinstance(env_config, dict):
+                            diff_cfg = env_config.get("difficulty")
+                            if isinstance(diff_cfg, str) and diff_cfg:
+                                difficulty_override = diff_cfg
+                        if difficulty_override is None:
+                            cfg_diff = rollout_cfg.get("difficulty") or cfg.get("difficulty")
+                            if isinstance(cfg_diff, str) and cfg_diff:
+                                difficulty_override = cfg_diff
+                        if difficulty_override is None:
+                            difficulty_override = os.getenv("DIFFICULTY", "easy")
                         r = await client.rollout(
                             run_id=run_id,
-                            env_name="crafter",
+                            env_name=env_name,
                             seed=seed,
-                            difficulty=os.getenv("DIFFICULTY", "easy"),
-                            policy_name=cfg.get("policy_name", "crafter"),
+                            difficulty=difficulty_override,
+                            policy_name=policy_name,
                             policy_config=policy_cfg,
-                            max_turns=MAX_TURNS,
+                            max_turns=max_turns_local,
+                            env_config=env_config,
                         )
+                        metrics_block = r.get("metrics") or {}
+                        mean_return = None
+                        if isinstance(metrics_block, dict):
+                            with contextlib.suppress(Exception):
+                                mean_return = float(metrics_block.get("mean_return"))
+                        stepwise_details: dict[str, Any] = {}
+                        if isinstance(metrics_block, dict):
+                            details_block = metrics_block.get("details") or {}
+                            if isinstance(details_block, dict):
+                                step_block = details_block.get("stepwise") or {}
+                                if isinstance(step_block, dict):
+                                    stepwise_details = step_block
                         # Extract achievements count if present
-                        ach = []
+                        achieved: set[str] = set()
                         try:
                             trajs = r.get("trajectories") or []
                             final_obs = (
@@ -455,9 +549,29 @@ async def main() -> None:
                                 else None
                             )
                             if isinstance(ach_map, dict):
-                                ach = sorted([k for k, v in ach_map.items() if v])
+                                achieved.update(k for k, v in ach_map.items() if v)
+                        except Exception:
+                            pass
+                        try:
+                            step_seen = stepwise_details.get("unique_achievements")
+                        except Exception:
+                            step_seen = None
+                        if isinstance(step_seen, (list, tuple, set)):
+                            achieved.update(str(a) for a in step_seen)
+                        else:
+                            try:
+                                alt_seen = stepwise_details.get("achievements_seen")
+                                if isinstance(alt_seen, (list, tuple, set)):
+                                    achieved.update(str(a) for a in alt_seen)
+                            except Exception:
+                                pass
+                        try:
+                            summary_final = stepwise_details.get("final_achievements")
+                            if isinstance(summary_final, (list, tuple, set)):
+                                achieved.update(str(a) for a in summary_final)
                         except Exception:
                             pass
+                        ach = sorted(achieved)
                         length = 0
                         try:
                             trajs = r.get("trajectories") or []
@@ -465,9 +579,22 @@ async def main() -> None:
                                 length = int(trajs[0].get("length") or 0)
                         except Exception:
                             pass
-                        return {"seed": seed, "turns": length, "achievements": ach}
+                        return {
+                            "seed": seed,
+                            "turns": length,
+                            "achievements": ach,
+                            "mean_return": mean_return,
+                            "stepwise": stepwise_details,
+                        }
                     except Exception as e:
-                        return {"seed": seed, "turns": 0, "achievements": [], "error": str(e)}
+                        return {
+                            "seed": seed,
+                            "turns": 0,
+                            "achievements": [],
+                            "mean_return": None,
+                            "stepwise": {},
+                            "error": str(e),
+                        }
             results = await asyncio.gather(
                 *[asyncio.create_task(_run(i)) for i in range(1, NUM_EPISODES + 1)],
@@ -483,17 +610,116 @@ async def main() -> None:
                         all_ach[a] += 1
                 except Exception:
                     pass
+            mean_returns: list[float] = []
+            stepwise_reward_sums: list[float] = []
+            stepwise_indicator_sums: list[float] = []
+            stepwise_new_ach_totals: list[float] = []
+            stepwise_resource_rewards: list[float] = []
+            strategies_seen = Counter()
+            unique_union: set[str] = set()
+            final_union: set[str] = set()
+            for r in results:
+                if not isinstance(r, dict):
+                    continue
+                with contextlib.suppress(Exception):
+                    mean_val = r.get("mean_return")
+                    if mean_val is not None:
+                        mean_returns.append(float(mean_val))
+                stepwise_block = r.get("stepwise")
+                if isinstance(stepwise_block, dict) and stepwise_block:
+                    with contextlib.suppress(Exception):
+                        if stepwise_block.get("reward_sum") is not None:
+                            stepwise_reward_sums.append(float(stepwise_block.get("reward_sum")))
+                    with contextlib.suppress(Exception):
+                        if stepwise_block.get("indicator_sum") is not None:
+                            stepwise_indicator_sums.append(float(stepwise_block.get("indicator_sum")))
+                    with contextlib.suppress(Exception):
+                        if stepwise_block.get("new_achievements_total") is not None:
+                            stepwise_new_ach_totals.append(
+                                float(stepwise_block.get("new_achievements_total"))
+                            )
+                    with contextlib.suppress(Exception):
+                        if stepwise_block.get("resource_reward") is not None:
+                            stepwise_resource_rewards.append(
+                                float(stepwise_block.get("resource_reward"))
+                            )
+                    with contextlib.suppress(Exception):
+                        uniq = stepwise_block.get("unique_achievements") or []
+                        if isinstance(uniq, (list, tuple, set)):
+                            unique_union.update(str(v) for v in uniq)
+                    with contextlib.suppress(Exception):
+                        final = stepwise_block.get("final_achievements") or []
+                        if isinstance(final, (list, tuple, set)):
+                            final_union.update(str(v) for v in final)
+                    strategy_name = stepwise_block.get("strategy")
+                    if isinstance(strategy_name, str) and strategy_name:
+                        strategies_seen[strategy_name] += 1
+            aggregate: dict[str, Any] = {
+                "completed": sum(
+                    1 for r in results if isinstance(r, dict) and not r.get("error")
+                ),
+                "total": len(results),
+                "avg_turns": (sum(turns) / len(turns)) if turns else 0.0,
+                "avg_achievements": (sum(counts) / len(counts)) if counts else 0.0,
+                "achievements_freq": dict(all_ach),
+            }
+            if mean_returns:
+                aggregate["avg_mean_return"] = sum(mean_returns) / len(mean_returns)
+            if stepwise_reward_sums:
+                aggregate["avg_stepwise_reward_sum"] = sum(stepwise_reward_sums) / len(
+                    stepwise_reward_sums
+                )
+            if stepwise_indicator_sums:
+                aggregate["avg_stepwise_indicator_sum"] = sum(stepwise_indicator_sums) / len(
+                    stepwise_indicator_sums
+                )
+            if stepwise_new_ach_totals:
+                aggregate["avg_stepwise_new_achievements"] = sum(stepwise_new_ach_totals) / len(
+                    stepwise_new_ach_totals
+                )
+            if stepwise_resource_rewards:
+                aggregate["avg_stepwise_resource_reward"] = (
+                    sum(stepwise_resource_rewards) / len(stepwise_resource_rewards)
+                )
+            if strategies_seen:
+                aggregate["stepwise_strategies"] = dict(strategies_seen)
+            aggregate["stepwise_samples"] = max(
+                len(stepwise_reward_sums),
+                len(stepwise_indicator_sums),
+                len(stepwise_new_ach_totals),
+                len(stepwise_resource_rewards),
+            ) if any(
+                (
+                    stepwise_reward_sums,
+                    stepwise_indicator_sums,
+                    stepwise_new_ach_totals,
+                    stepwise_resource_rewards,
+                )
+            ) else 0
+            if not unique_union:
+                for r in results:
+                    try:
+                        for a in r.get("achievements") or []:
+                            unique_union.add(str(a))
+                    except Exception:
+                        continue
+            if not final_union:
+                final_union.update(unique_union)
+            if unique_union:
+                aggregate["unique_achievements_union"] = sorted(unique_union)
+            if final_union:
+                aggregate["final_achievements_union"] = sorted(final_union)
             summary = {
                 "episodes": results,
-                "aggregate": {
-                    "completed": sum(1 for r in results if not r.get("error")),
-                    "total": len(results),
-                    "avg_turns": (sum(turns) / len(turns)) if turns else 0.0,
-                    "avg_achievements": (sum(counts) / len(counts)) if counts else 0.0,
-                    "achievements_freq": dict(all_ach),
-                },
+                "aggregate": aggregate,
             }
             print(json.dumps(summary, indent=2))
+            # Failure guardrails: any error or zero-turn episodes across the board
+            any_errors = any(isinstance(r, dict) and r.get("error") for r in results)
+            all_zero_turns = all((int(r.get("turns") or 0) == 0) for r in results if isinstance(r, dict))
+            if any_errors or all_zero_turns:
+                # Exit non-zero so automation/CI treats this as a failure
+                sys.exit(2)
         else:
             async def _run(seed: int):

examples/warming_up_to_rl/task_app/grpo_crafter.py CHANGED Viewed

@@ -93,6 +93,7 @@ TASK_APP_ROOT = _resolve_task_app_root(REPO_ROOT)
 SYNTH_ENVS_HOSTED_ROOT = (TASK_APP_ROOT / "synth_envs_hosted").resolve()
 EXAMPLES_ROOT = (REPO_ROOT / "examples").resolve()
+RUBRICS_ROOT = (EXAMPLES_ROOT / "multi_step" / "rubrics").resolve()
 for path in (REPO_ROOT, TASK_APP_ROOT, SYNTH_ENVS_HOSTED_ROOT, EXAMPLES_ROOT):
     try:
@@ -344,40 +345,9 @@ def _base_task_info(dataset: CrafterDataset) -> TaskInfo:
     )
-OUTCOME_RUBRIC = load_rubric(
-    {
-        "version": "1",
-        "goal_text": "Reward unlocking Crafter achievements and survival.",
-        "aggregation": "weighted_sum",
-        "criteria": [
-            {
-                "id": "achievements",
-                "description": "Unlock achievements or crafting milestones.",
-                "weight": 1.0,
-            },
-            {
-                "id": "survival",
-                "description": "Maintain health, food, and drink levels.",
-                "weight": 1.0,
-            },
-        ],
-    }
-)
+OUTCOME_RUBRIC = load_rubric(str(RUBRICS_ROOT / "crafter_outcome_rubric.json"))
-EVENTS_RUBRIC = load_rubric(
-    {
-        "version": "1",
-        "goal_text": "Encourage purposeful step-wise exploration and crafting.",
-        "aggregation": "weighted_sum",
-        "criteria": [
-            {
-                "id": "progress_steps",
-                "description": "Actions progress quests, crafting, or exploration.",
-                "weight": 1.0,
-            }
-        ],
-    }
-)
+EVENTS_RUBRIC = load_rubric(str(RUBRICS_ROOT / "crafter_events_rubric.json"))
 def describe_taskset(dataset: CrafterDataset) -> dict[str, Any]:

synth-ai 0.2.10__py3-none-any.whl → 0.2.13.dev1__py3-none-any.whl

Potentially problematic release.

synth-ai 0.2.10py3-none-any.whl → 0.2.13.dev1py3-none-any.whl