PyPI - synth-ai - Versions diffs - 0.2.12__py3-none-any.whl → 0.2.13.dev1__py3-none-any.whl - Mend

synth-ai 0.2.12py3-none-any.whl → 0.2.13.dev1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of synth-ai might be problematic. Click here for more details.

Files changed (48) hide show

examples/agora_ex/README_MoE.md +224 -0
examples/agora_ex/__init__.py +7 -0
examples/agora_ex/agora_ex.py +65 -0
examples/agora_ex/agora_ex_task_app.py +590 -0
examples/agora_ex/configs/rl_lora_qwen3_moe_2xh200.toml +121 -0
examples/agora_ex/reward_fn_grpo-human.py +129 -0
examples/agora_ex/system_prompt_CURRENT.md +63 -0
examples/agora_ex/task_app/agora_ex_task_app.py +590 -0
examples/agora_ex/task_app/reward_fn_grpo-human.py +129 -0
examples/agora_ex/task_app/system_prompt_CURRENT.md +63 -0
examples/multi_step/configs/crafter_rl_outcome.toml +74 -0
examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +175 -0
examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +83 -0
examples/multi_step/configs/crafter_rl_stepwise_simple.toml +78 -0
examples/multi_step/crafter_rl_lora.md +51 -10
examples/multi_step/sse_metrics_streaming_notes.md +357 -0
examples/multi_step/task_app_config_notes.md +7 -1
examples/warming_up_to_rl/configs/eval_stepwise_complex.toml +4 -2
examples/warming_up_to_rl/configs/eval_stepwise_simple.toml +4 -2
examples/warming_up_to_rl/run_eval.py +127 -18
examples/warming_up_to_rl/task_app/grpo_crafter.py +3 -33
examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +109 -45
examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +42 -46
examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +232 -193
synth_ai/__init__.py +41 -1
synth_ai/api/train/builders.py +49 -19
synth_ai/api/train/configs/__init__.py +44 -0
synth_ai/api/train/configs/rl.py +133 -0
synth_ai/api/train/configs/sft.py +94 -0
synth_ai/api/train/configs/shared.py +24 -0
synth_ai/cli/demo.py +38 -39
synth_ai/cli/rl_demo.py +81 -102
synth_ai/cli/task_apps.py +3 -0
synth_ai/demos/core/cli.py +121 -159
synth_ai/environments/examples/crafter_classic/environment.py +16 -0
synth_ai/evals/__init__.py +15 -0
synth_ai/evals/client.py +85 -0
synth_ai/evals/types.py +42 -0
synth_ai/judge_schemas.py +127 -0
synth_ai/rubrics/__init__.py +22 -0
synth_ai/rubrics/validators.py +126 -0
synth_ai/tracing_v3/serialization.py +130 -0
{synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev1.dist-info}/METADATA +1 -1
{synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev1.dist-info}/RECORD +48 -22
{synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev1.dist-info}/entry_points.txt +0 -1
{synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev1.dist-info}/WHEEL +0 -0
{synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev1.dist-info}/licenses/LICENSE +0 -0
{synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev1.dist-info}/top_level.txt +0 -0

synth_ai/api/train/builders.py CHANGED Viewed

@@ -6,6 +6,7 @@ from pathlib import Path
 from typing import Any, cast
 import click
+from pydantic import ValidationError
 try:
     _models_module = importlib.import_module("synth_ai.api.models.supported")
@@ -25,7 +26,8 @@ from .supported_algos import (
     ensure_model_supported_for_algorithm,
     validate_algorithm_config,
 )
-from .utils import TrainError, ensure_api_base, load_toml
+from .utils import TrainError, ensure_api_base
+from .configs import RLConfig, SFTConfig
 @dataclass(slots=True)
@@ -42,6 +44,16 @@ class SFTBuildResult:
     validation_file: Path | None
+def _format_validation_error(path: Path, exc: ValidationError) -> str:
+    lines: list[str] = []
+    for error in exc.errors():
+        loc = ".".join(str(part) for part in error.get("loc", ()))
+        msg = error.get("msg", "invalid value")
+        lines.append(f"{loc or '<root>'}: {msg}")
+    details = "\n".join(f"  - {line}" for line in lines) or "  - Invalid configuration"
+    return f"Config validation failed ({path}):\n{details}"
 def build_rl_payload(
     *,
     config_path: Path,
@@ -50,13 +62,30 @@ def build_rl_payload(
     idempotency: str | None,
     allow_experimental: bool | None = None,
 ) -> RLBuildResult:
-    data = load_toml(config_path)
     try:
-        spec = validate_algorithm_config(data.get("algorithm"), expected_family="rl")
+        rl_cfg = RLConfig.from_path(config_path)
+    except ValidationError as exc:
+        raise click.ClickException(_format_validation_error(config_path, exc)) from exc
+    data = rl_cfg.to_dict()
+    # Ensure required [reference] section for backend validators
+    try:
+        ref_cfg = data.get("reference") if isinstance(data, dict) else None
+        if not isinstance(ref_cfg, dict):
+            data["reference"] = {"placement": "none"}
+        else:
+            ref_cfg.setdefault("placement", "none")
+    except Exception:
+        # Defensive: never fail builder due to optional defaults
+        data["reference"] = {"placement": "none"}
+    try:
+        spec = validate_algorithm_config(
+            rl_cfg.algorithm.model_dump(), expected_family="rl"
+        )
     except AlgorithmValidationError as exc:
         raise click.ClickException(str(exc)) from exc
     services = data.get("services") if isinstance(data.get("services"), dict) else {}
-    model_cfg = data.get("model") if isinstance(data.get("model"), dict) else {}
+    model_cfg = rl_cfg.model
     final_task_url = (
         overrides.get("task_url")
@@ -69,10 +98,8 @@ def build_rl_payload(
             "Task app URL required (provide --task-url or set services.task_url in TOML)"
         )
-    raw_source = model_cfg.get("source") if isinstance(model_cfg, dict) else ""
-    model_source = str(raw_source or "").strip()
-    raw_base = model_cfg.get("base") if isinstance(model_cfg, dict) else ""
-    model_base = str(raw_base or "").strip()
+    model_source = (model_cfg.source or "").strip()
+    model_base = (model_cfg.base or "").strip()
     override_model = (overrides.get("model") or "").strip()
     if override_model:
         model_source = override_model
@@ -160,22 +187,23 @@ def build_sft_payload(
     dataset_override: Path | None,
     allow_experimental: bool | None,
 ) -> SFTBuildResult:
-    data = load_toml(config_path)
     try:
-        spec = validate_algorithm_config(data.get("algorithm"), expected_family="sft")
+        sft_cfg = SFTConfig.from_path(config_path)
+    except ValidationError as exc:
+        raise TrainError(_format_validation_error(config_path, exc)) from exc
+    data = sft_cfg.to_dict()
+    try:
+        algo_mapping = sft_cfg.algorithm.model_dump() if sft_cfg.algorithm else None
+        spec = validate_algorithm_config(algo_mapping, expected_family="sft")
     except AlgorithmValidationError as exc:
         raise TrainError(str(exc)) from exc
-    job_cfg = data.get("job") if isinstance(data.get("job"), dict) else {}
     data_cfg = data.get("data") if isinstance(data.get("data"), dict) else {}
     hp_cfg = data.get("hyperparameters") if isinstance(data.get("hyperparameters"), dict) else {}
     train_cfg = data.get("training") if isinstance(data.get("training"), dict) else {}
     compute_cfg = data.get("compute") if isinstance(data.get("compute"), dict) else {}
-    raw_dataset = (
-        dataset_override
-        or (job_cfg.get("data") if isinstance(job_cfg, dict) else None)
-        or (job_cfg.get("data_path") if isinstance(job_cfg, dict) else None)
-    )
+    raw_dataset = dataset_override or sft_cfg.job.data or sft_cfg.job.data_path
     if not raw_dataset:
         raise TrainError("Dataset not specified; pass --dataset or set [job].data")
     dataset_path = Path(raw_dataset)
@@ -260,9 +288,11 @@ def build_sft_payload(
             "enabled": bool(validation_cfg.get("enabled", True))
         }
-    raw_model = str(
-        job_cfg.get("model") if isinstance(job_cfg, dict) else None or data.get("model") or ""
-    ).strip()
+    raw_model = (sft_cfg.job.model or "").strip()
+    if not raw_model:
+        model_block = data.get("model")
+        if isinstance(model_block, str):
+            raw_model = model_block.strip()
     if not raw_model:
         raise TrainError("Model not specified; set [job].model or [model].base in the config")

synth_ai/api/train/configs/__init__.py ADDED Viewed

@@ -0,0 +1,44 @@
+"""Typed training config loaders for RL and SFT jobs."""
+from .shared import AlgorithmConfig, ComputeConfig
+from .sft import (
+    HyperparametersConfig,
+    HyperparametersParallelism,
+    JobConfig,
+    SFTConfig,
+    SFTDataConfig,
+    TrainingConfig,
+    TrainingValidationConfig,
+)
+from .rl import (
+    EvaluationConfig,
+    JudgeConfig,
+    JudgeOptionsConfig,
+    ModelConfig,
+    RLConfig,
+    RLServicesConfig,
+    RLTrainingConfig,
+    RolloutConfig,
+    WeightSyncConfig,
+)
+__all__ = [
+    "AlgorithmConfig",
+    "ComputeConfig",
+    "EvaluationConfig",
+    "HyperparametersConfig",
+    "HyperparametersParallelism",
+    "JobConfig",
+    "JudgeConfig",
+    "JudgeOptionsConfig",
+    "ModelConfig",
+    "RLConfig",
+    "RLServicesConfig",
+    "RLTrainingConfig",
+    "RolloutConfig",
+    "SFTConfig",
+    "SFTDataConfig",
+    "TrainingConfig",
+    "TrainingValidationConfig",
+    "WeightSyncConfig",
+]

synth_ai/api/train/configs/rl.py ADDED Viewed

@@ -0,0 +1,133 @@
+from __future__ import annotations
+from pathlib import Path
+from typing import Any, Mapping
+from pydantic import model_validator
+from ..utils import load_toml
+from .shared import AlgorithmConfig, ComputeConfig, ExtraModel
+class RLServicesConfig(ExtraModel):
+    task_url: str
+    judge_url: str | None = None
+class ModelConfig(ExtraModel):
+    source: str | None = None
+    base: str | None = None
+    trainer_mode: str
+    label: str
+    @model_validator(mode="after")
+    def _ensure_exactly_one_source_or_base(self) -> "ModelConfig":
+        if bool(self.source) == bool(self.base):
+            raise ValueError("Config must set exactly one of [model].source or [model].base")
+        return self
+class RolloutConfig(ExtraModel):
+    env_name: str
+    policy_name: str
+    env_config: dict[str, Any] | None = None
+    policy_config: dict[str, Any] | None = None
+    max_turns: int
+    episodes_per_batch: int
+    max_concurrent_rollouts: int
+    batches_per_step: int | None = None
+    ops: list[str] | None = None
+class WeightSyncConfig(ExtraModel):
+    enable: bool | None = None
+    targets: list[str] | None = None
+    mode: str | None = None
+    direct: bool | None = None
+    verify_every_k: int | None = None
+class RLTrainingConfig(ExtraModel):
+    num_epochs: int
+    iterations_per_epoch: int
+    gradient_accumulation_steps: int | None = None
+    max_accumulated_minibatch: int | None = None
+    max_turns: int
+    batch_size: int
+    group_size: int
+    learning_rate: float
+    log_interval: int | None = None
+    weight_sync_interval: int | None = None
+    step_rewards_enabled: bool | None = None
+    step_rewards_mode: str | None = None
+    step_rewards_indicator_lambda: float | None = None
+    step_rewards_beta: float | None = None
+    step_rewards_strategy: str | None = None
+    event_rewards_kind: str | None = None
+    weight_sync: WeightSyncConfig | None = None
+class EvaluationConfig(ExtraModel):
+    instances: int
+    every_n_iters: int
+    seeds: list[int]
+class JudgeOptionsConfig(ExtraModel):
+    event: bool | None = None
+    outcome: bool | None = None
+    provider: str | None = None
+    model: str | None = None
+    rubric_id: str | None = None
+    rubric_overrides: dict[str, Any] | None = None
+    tracks: list[str] | None = None
+    weights: dict[str, float] | None = None
+    max_concurrency: int | None = None
+class JudgeConfig(ExtraModel):
+    type: str | None = None
+    timeout_s: int | None = None
+    options: JudgeOptionsConfig | None = None
+class RLConfig(ExtraModel):
+    algorithm: AlgorithmConfig
+    services: RLServicesConfig
+    compute: ComputeConfig | None = None
+    topology: dict[str, Any] | None = None
+    vllm: dict[str, Any] | None = None
+    reference: dict[str, Any] | None = None
+    model: ModelConfig
+    lora: dict[str, Any] | None = None
+    rollout: RolloutConfig | None = None
+    evaluation: EvaluationConfig | None = None
+    training: RLTrainingConfig | None = None
+    rubric: dict[str, Any] | None = None
+    judge: JudgeConfig | None = None
+    tags: dict[str, Any] | None = None
+    def to_dict(self) -> dict[str, Any]:
+        return self.model_dump(mode="python", exclude_none=True)
+    @classmethod
+    def from_mapping(cls, data: Mapping[str, Any]) -> "RLConfig":
+        return cls.model_validate(dict(data))
+    @classmethod
+    def from_path(cls, path: Path) -> "RLConfig":
+        content = load_toml(path)
+        return cls.from_mapping(content)
+__all__ = [
+    "EvaluationConfig",
+    "JudgeConfig",
+    "JudgeOptionsConfig",
+    "ModelConfig",
+    "RLConfig",
+    "RLServicesConfig",
+    "RLTrainingConfig",
+    "RolloutConfig",
+    "WeightSyncConfig",
+]

synth_ai/api/train/configs/sft.py ADDED Viewed

@@ -0,0 +1,94 @@
+from __future__ import annotations
+from pathlib import Path
+from typing import Any, Mapping
+from pydantic import Field
+from ..utils import load_toml
+from .shared import AlgorithmConfig, ComputeConfig, ExtraModel
+class JobConfig(ExtraModel):
+    model: str
+    data: str | None = None
+    data_path: str | None = None
+    poll_seconds: int | None = None
+class SFTDataConfig(ExtraModel):
+    topology: dict[str, Any] | None = None
+    validation_path: str | None = None
+class TrainingValidationConfig(ExtraModel):
+    enabled: bool | None = None
+    evaluation_strategy: str | None = None
+    eval_steps: int | None = None
+    save_best_model_at_end: bool | None = None
+    metric_for_best_model: str | None = None
+    greater_is_better: bool | None = None
+class TrainingConfig(ExtraModel):
+    mode: str | None = None
+    use_qlora: bool | None = None
+    validation: TrainingValidationConfig | None = None
+class HyperparametersParallelism(ExtraModel):
+    use_deepspeed: bool | None = None
+    deepspeed_stage: int | None = None
+    fsdp: bool | None = None
+    bf16: bool | None = None
+    fp16: bool | None = None
+    activation_checkpointing: bool | None = None
+    tensor_parallel_size: int | None = None
+    pipeline_parallel_size: int | None = None
+class HyperparametersConfig(ExtraModel):
+    n_epochs: int = 1
+    batch_size: int | None = None
+    global_batch: int | None = None
+    per_device_batch: int | None = None
+    gradient_accumulation_steps: int | None = None
+    sequence_length: int | None = None
+    learning_rate: float | None = None
+    warmup_ratio: float | None = None
+    train_kind: str | None = None
+    weight_decay: float | None = None
+    parallelism: HyperparametersParallelism | None = None
+class SFTConfig(ExtraModel):
+    algorithm: AlgorithmConfig | None = None
+    job: JobConfig
+    compute: ComputeConfig | None = None
+    data: SFTDataConfig | None = None
+    training: TrainingConfig | None = None
+    hyperparameters: HyperparametersConfig = Field(default_factory=HyperparametersConfig)
+    tags: dict[str, Any] | None = None
+    def to_dict(self) -> dict[str, Any]:
+        return self.model_dump(mode="python", exclude_none=True)
+    @classmethod
+    def from_mapping(cls, data: Mapping[str, Any]) -> "SFTConfig":
+        return cls.model_validate(dict(data))
+    @classmethod
+    def from_path(cls, path: Path) -> "SFTConfig":
+        content = load_toml(path)
+        return cls.from_mapping(content)
+__all__ = [
+    "HyperparametersConfig",
+    "HyperparametersParallelism",
+    "JobConfig",
+    "SFTConfig",
+    "SFTDataConfig",
+    "TrainingConfig",
+    "TrainingValidationConfig",
+]

synth_ai/api/train/configs/shared.py ADDED Viewed

@@ -0,0 +1,24 @@
+from __future__ import annotations
+from pydantic import BaseModel, ConfigDict
+class ExtraModel(BaseModel):
+    """Base model that tolerates unknown keys so configs keep forward compatibility."""
+    model_config = ConfigDict(extra="allow")
+class AlgorithmConfig(ExtraModel):
+    type: str
+    method: str
+    variety: str
+class ComputeConfig(ExtraModel):
+    gpu_type: str
+    gpu_count: int
+    nodes: int | None = None
+__all__ = ["ExtraModel", "AlgorithmConfig", "ComputeConfig"]

synth_ai/cli/demo.py CHANGED Viewed

@@ -1,9 +1,9 @@
 #!/usr/bin/env python3
 """
-CLI: interactive launcher for example demos and forwarders for new RL demo.
+CLI: interactive launcher for example demos and RL demo helpers.
-- `synth-ai demo` (no subcommand) -> legacy examples/ runner (run_demo.sh picker)
-- `synth-ai demo deploy|configure|run` -> forwards to synth_ai.demos.core.cli
+- `synth-ai demo` (no subcommand) -> initialize RL demo files into ./synth_demo/
+- `synth-ai demo deploy|configure|run` -> invoke RL demo helpers directly.
 """
 from __future__ import annotations
@@ -14,6 +14,8 @@ from pathlib import Path
 import click
+from synth_ai.demos.core import cli as demo_commands
 def _find_demo_scripts(root: Path) -> list[Path]:
     if not root.exists():
@@ -21,17 +23,23 @@ def _find_demo_scripts(root: Path) -> list[Path]:
     return sorted([p for p in root.rglob("run_demo.sh") if p.is_file()])
-def _forward_to_new(args: list[str]) -> None:
-    import sys
+def _run_demo_command(func, *args, **kwargs) -> None:
+    """Invoke a demo command and exit via Click on non-zero status codes."""
+    try:
+        result = func(*args, **kwargs)
+    except SystemExit as exc:  # pragma: no cover - defensive
+        raise click.exceptions.Exit(exc.code or 1) from exc
+    if result is None:
+        return
     try:
-        from synth_ai.demos.core import cli as demo_cli  # type: ignore
-    except Exception as e:  # pragma: no cover
-        click.echo(f"Failed to import demo CLI: {e}")
-        sys.exit(1)
-    rc = int(demo_cli.main(args) or 0)
-    if rc != 0:
-        sys.exit(rc)
+        code = int(result)
+    except (TypeError, ValueError):
+        return
+    if code != 0:
+        raise click.exceptions.Exit(code)
 def register(cli):
@@ -92,11 +100,8 @@ def register(cli):
                 click.echo("\n🛑 Demo interrupted by user")
             return
-        # Default: forward to RL demo init behavior, optionally with --force
-        args: list[str] = ["rl_demo.init"]
-        if force:
-            args.append("--force")
-        _forward_to_new(args)
+        # Default: initialize RL demo files via new command
+        _run_demo_command(demo_commands.init, force=force)
     # (prepare command removed; configure now prepares baseline TOML)
@@ -122,24 +127,21 @@ def register(cli):
         help="Path to deploy_task_app.sh (optional legacy)",
     )
     def demo_deploy(local: bool, app: str | None, name: str, script: str | None):
-        args: list[str] = ["rl_demo.deploy"]
-        if local:
-            args.append("--local")
-        if app:
-            args.extend(["--app", app])
-        if name:
-            args.extend(["--name", name])
-        if script:
-            args.extend(["--script", script])
-        _forward_to_new(args)
+        _run_demo_command(
+            demo_commands.deploy,
+            local=local,
+            app=app,
+            name=name,
+            script=script,
+        )
     @_dg.command("configure")
     def demo_configure():
-        _forward_to_new(["rl_demo.configure"])
+        _run_demo_command(demo_commands.run)
     @_dg.command("setup")
     def demo_setup():
-        _forward_to_new(["rl_demo.setup"])
+        _run_demo_command(demo_commands.setup)
     @_dg.command("run")
     @click.option("--batch-size", type=int, default=None)
@@ -147,13 +149,10 @@ def register(cli):
     @click.option("--model", type=str, default=None)
     @click.option("--timeout", type=int, default=600)
     def demo_run(batch_size: int | None, group_size: int | None, model: str | None, timeout: int):
-        args = ["rl_demo.run"]
-        if batch_size is not None:
-            args.extend(["--batch-size", str(batch_size)])
-        if group_size is not None:
-            args.extend(["--group-size", str(group_size)])
-        if model:
-            args.extend(["--model", model])
-        if timeout:
-            args.extend(["--timeout", str(timeout)])
-        _forward_to_new(args)
+        _run_demo_command(
+            demo_commands.run,
+            batch_size=batch_size,
+            group_size=group_size,
+            model=model,
+            timeout=timeout,
+        )

synth-ai 0.2.12__py3-none-any.whl → 0.2.13.dev1__py3-none-any.whl

Potentially problematic release.

synth-ai 0.2.12py3-none-any.whl → 0.2.13.dev1py3-none-any.whl