PyPI - synth-ai - Versions diffs - 0.4.1__py3-none-any.whl → 0.4.4__py3-none-any.whl - Mend

synth-ai 0.4.1py3-none-any.whl → 0.4.4py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of synth-ai might be problematic. Click here for more details.

Files changed (153) hide show

synth_ai/__init__.py +13 -13
synth_ai/cli/__init__.py +6 -15
synth_ai/cli/commands/eval/__init__.py +6 -15
synth_ai/cli/commands/eval/config.py +338 -0
synth_ai/cli/commands/eval/core.py +236 -1091
synth_ai/cli/commands/eval/runner.py +704 -0
synth_ai/cli/commands/eval/validation.py +44 -117
synth_ai/cli/commands/filter/core.py +7 -7
synth_ai/cli/commands/filter/validation.py +2 -2
synth_ai/cli/commands/smoke/core.py +7 -17
synth_ai/cli/commands/status/__init__.py +1 -64
synth_ai/cli/commands/status/client.py +50 -151
synth_ai/cli/commands/status/config.py +3 -83
synth_ai/cli/commands/status/errors.py +4 -13
synth_ai/cli/commands/status/subcommands/__init__.py +2 -8
synth_ai/cli/commands/status/subcommands/config.py +13 -0
synth_ai/cli/commands/status/subcommands/files.py +18 -63
synth_ai/cli/commands/status/subcommands/jobs.py +28 -311
synth_ai/cli/commands/status/subcommands/models.py +18 -62
synth_ai/cli/commands/status/subcommands/runs.py +16 -63
synth_ai/cli/commands/status/subcommands/session.py +67 -172
synth_ai/cli/commands/status/subcommands/summary.py +24 -32
synth_ai/cli/commands/status/subcommands/utils.py +41 -0
synth_ai/cli/commands/status/utils.py +16 -107
synth_ai/cli/commands/train/__init__.py +18 -20
synth_ai/cli/commands/train/errors.py +3 -3
synth_ai/cli/commands/train/prompt_learning_validation.py +15 -16
synth_ai/cli/commands/train/validation.py +7 -7
synth_ai/cli/commands/train/{judge_schemas.py → verifier_schemas.py} +33 -34
synth_ai/cli/commands/train/verifier_validation.py +235 -0
synth_ai/cli/demo_apps/demo_task_apps/math/config.toml +0 -1
synth_ai/cli/demo_apps/demo_task_apps/math/modal_task_app.py +2 -6
synth_ai/cli/demo_apps/math/config.toml +0 -1
synth_ai/cli/demo_apps/math/modal_task_app.py +2 -6
synth_ai/cli/demo_apps/mipro/task_app.py +25 -47
synth_ai/cli/lib/apps/task_app.py +12 -13
synth_ai/cli/lib/task_app_discovery.py +6 -6
synth_ai/cli/lib/train_cfgs.py +10 -10
synth_ai/cli/task_apps/__init__.py +11 -0
synth_ai/cli/task_apps/commands.py +7 -15
synth_ai/core/env.py +12 -1
synth_ai/core/errors.py +1 -2
synth_ai/core/integrations/cloudflare.py +209 -33
synth_ai/core/tracing_v3/abstractions.py +46 -0
synth_ai/data/__init__.py +3 -30
synth_ai/data/enums.py +1 -20
synth_ai/data/rewards.py +100 -3
synth_ai/products/graph_evolve/__init__.py +1 -2
synth_ai/products/graph_evolve/config.py +16 -16
synth_ai/products/graph_evolve/converters/__init__.py +3 -3
synth_ai/products/graph_evolve/converters/openai_sft.py +7 -7
synth_ai/products/graph_evolve/examples/hotpotqa/config.toml +1 -1
synth_ai/products/graph_gepa/__init__.py +23 -0
synth_ai/products/graph_gepa/converters/__init__.py +19 -0
synth_ai/products/graph_gepa/converters/openai_sft.py +29 -0
synth_ai/sdk/__init__.py +45 -35
synth_ai/sdk/api/eval/__init__.py +33 -0
synth_ai/sdk/api/eval/job.py +732 -0
synth_ai/sdk/api/research_agent/__init__.py +276 -66
synth_ai/sdk/api/train/builders.py +181 -0
synth_ai/sdk/api/train/cli.py +41 -33
synth_ai/sdk/api/train/configs/__init__.py +6 -4
synth_ai/sdk/api/train/configs/prompt_learning.py +127 -33
synth_ai/sdk/api/train/configs/rl.py +264 -16
synth_ai/sdk/api/train/configs/sft.py +165 -1
synth_ai/sdk/api/train/graph_validators.py +12 -12
synth_ai/sdk/api/train/graphgen.py +169 -51
synth_ai/sdk/api/train/graphgen_models.py +95 -45
synth_ai/sdk/api/train/local_api.py +10 -0
synth_ai/sdk/api/train/pollers.py +36 -0
synth_ai/sdk/api/train/prompt_learning.py +390 -60
synth_ai/sdk/api/train/rl.py +41 -5
synth_ai/sdk/api/train/sft.py +2 -0
synth_ai/sdk/api/train/task_app.py +20 -0
synth_ai/sdk/api/train/validators.py +17 -17
synth_ai/sdk/graphs/completions.py +239 -33
synth_ai/sdk/{judging/schemas.py → graphs/verifier_schemas.py} +23 -23
synth_ai/sdk/learning/__init__.py +35 -5
synth_ai/sdk/learning/context_learning_client.py +531 -0
synth_ai/sdk/learning/context_learning_types.py +294 -0
synth_ai/sdk/learning/prompt_learning_client.py +1 -1
synth_ai/sdk/learning/prompt_learning_types.py +2 -1
synth_ai/sdk/learning/rl/__init__.py +0 -4
synth_ai/sdk/learning/rl/contracts.py +0 -4
synth_ai/sdk/localapi/__init__.py +40 -0
synth_ai/sdk/localapi/apps/__init__.py +28 -0
synth_ai/sdk/localapi/client.py +10 -0
synth_ai/sdk/localapi/contracts.py +10 -0
synth_ai/sdk/localapi/helpers.py +519 -0
synth_ai/sdk/localapi/rollouts.py +93 -0
synth_ai/sdk/localapi/server.py +29 -0
synth_ai/sdk/localapi/template.py +49 -0
synth_ai/sdk/streaming/handlers.py +6 -6
synth_ai/sdk/streaming/streamer.py +10 -6
synth_ai/sdk/task/__init__.py +18 -5
synth_ai/sdk/task/apps/__init__.py +37 -1
synth_ai/sdk/task/client.py +9 -1
synth_ai/sdk/task/config.py +6 -11
synth_ai/sdk/task/contracts.py +137 -95
synth_ai/sdk/task/in_process.py +32 -22
synth_ai/sdk/task/in_process_runner.py +9 -4
synth_ai/sdk/task/rubrics/__init__.py +2 -3
synth_ai/sdk/task/rubrics/loaders.py +4 -4
synth_ai/sdk/task/rubrics/strict.py +3 -4
synth_ai/sdk/task/server.py +76 -16
synth_ai/sdk/task/trace_correlation_helpers.py +190 -139
synth_ai/sdk/task/validators.py +34 -49
synth_ai/sdk/training/__init__.py +7 -16
synth_ai/sdk/tunnels/__init__.py +118 -0
synth_ai/sdk/tunnels/cleanup.py +83 -0
synth_ai/sdk/tunnels/ports.py +120 -0
synth_ai/sdk/tunnels/tunneled_api.py +363 -0
{synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/METADATA +71 -4
{synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/RECORD +118 -128
synth_ai/cli/commands/baseline/__init__.py +0 -12
synth_ai/cli/commands/baseline/core.py +0 -636
synth_ai/cli/commands/baseline/list.py +0 -94
synth_ai/cli/commands/eval/errors.py +0 -81
synth_ai/cli/commands/status/formatters.py +0 -164
synth_ai/cli/commands/status/subcommands/pricing.py +0 -23
synth_ai/cli/commands/status/subcommands/usage.py +0 -203
synth_ai/cli/commands/train/judge_validation.py +0 -305
synth_ai/cli/usage.py +0 -159
synth_ai/data/specs.py +0 -36
synth_ai/sdk/api/research_agent/cli.py +0 -428
synth_ai/sdk/api/research_agent/config.py +0 -357
synth_ai/sdk/api/research_agent/job.py +0 -717
synth_ai/sdk/baseline/__init__.py +0 -25
synth_ai/sdk/baseline/config.py +0 -209
synth_ai/sdk/baseline/discovery.py +0 -216
synth_ai/sdk/baseline/execution.py +0 -154
synth_ai/sdk/judging/__init__.py +0 -15
synth_ai/sdk/judging/base.py +0 -24
synth_ai/sdk/judging/client.py +0 -191
synth_ai/sdk/judging/types.py +0 -42
synth_ai/sdk/research_agent/__init__.py +0 -34
synth_ai/sdk/research_agent/container_builder.py +0 -328
synth_ai/sdk/research_agent/container_spec.py +0 -198
synth_ai/sdk/research_agent/defaults.py +0 -34
synth_ai/sdk/research_agent/results_collector.py +0 -69
synth_ai/sdk/specs/__init__.py +0 -46
synth_ai/sdk/specs/dataclasses.py +0 -149
synth_ai/sdk/specs/loader.py +0 -144
synth_ai/sdk/specs/serializer.py +0 -199
synth_ai/sdk/specs/validation.py +0 -250
synth_ai/sdk/tracing/__init__.py +0 -39
synth_ai/sdk/usage/__init__.py +0 -37
synth_ai/sdk/usage/client.py +0 -171
synth_ai/sdk/usage/models.py +0 -261
{synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/WHEEL +0 -0
{synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/entry_points.txt +0 -0
{synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/licenses/LICENSE +0 -0
{synth_ai-0.4.1.dist-info → synth_ai-0.4.4.dist-info}/top_level.txt +0 -0

synth_ai/sdk/api/train/configs/rl.py CHANGED Viewed

@@ -1,3 +1,42 @@
+"""RL (Reinforcement Learning) configuration models.
+This module defines the configuration schema for RL training jobs using GSPO
+(Group Sequence Policy Optimization) or other policy gradient methods.
+Example TOML configuration:
+    ```toml
+    [algorithm]
+    type = "online"
+    method = "policy_gradient"
+    variety = "gspo"
+    [services]
+    task_url = "https://your-tunnel.trycloudflare.com"
+    [model]
+    base = "Qwen/Qwen3-4B"
+    trainer_mode = "lora"
+    label = "my-rl-model"
+    [rollout]
+    env_name = "my-task"
+    policy_name = "my-policy"
+    max_turns = 10
+    episodes_per_batch = 32
+    max_concurrent_rollouts = 8
+    [training]
+    num_epochs = 1
+    iterations_per_epoch = 20
+    batch_size = 16
+    group_size = 4
+    learning_rate = 5e-5
+    ```
+See Also:
+    - Training reference: /training/gspo
+    - Job events: /sdk/jobs/rl
+"""
 from __future__ import annotations
 from collections.abc import Mapping
@@ -11,11 +50,32 @@ from .shared import AlgorithmConfig, ComputeConfig, ExtraModel, LoraConfig, Poli
 class RLServicesConfig(ExtraModel):
+    """Service URLs for RL training.
+    Attributes:
+        task_url: URL of your task app (typically a Cloudflare tunnel URL).
+            Required for rollout execution.
+        verifier_url: Optional URL for verifier service. Defaults to Synth's
+            hosted verifier at https://synth-backend.onrender.com/api.
+    """
     task_url: str
-    judge_url: str | None = None
+    verifier_url: str | None = None
 class ModelConfig(ExtraModel):
+    """Model configuration for RL training.
+    Specify either `base` (for a new model) or `source` (to continue from
+    a checkpoint), but not both.
+    Attributes:
+        source: Checkpoint ID to continue training from (e.g., "ft:job_abc123").
+            Mutually exclusive with `base`.
+        base: Base model to fine-tune (e.g., "Qwen/Qwen3-4B").
+            Mutually exclusive with `source`.
+        trainer_mode: Training mode - "lora", "qlora", or "full".
+        label: Human-readable identifier for this model.
+    """
     source: str | None = None
     base: str | None = None
     trainer_mode: str
@@ -29,6 +89,20 @@ class ModelConfig(ExtraModel):
 class RolloutConfig(ExtraModel):
+    """Rollout configuration for episode collection.
+    Controls how episodes are collected from the task app during training.
+    Attributes:
+        env_name: Environment/task name registered in your task app.
+        policy_name: Policy identifier for the rollout.
+        env_config: Optional environment-specific configuration dict.
+        policy_config: Optional policy-specific configuration dict.
+        max_turns: Maximum steps per episode before truncation.
+        episodes_per_batch: Number of episodes to collect per training batch.
+        max_concurrent_rollouts: Maximum parallel rollouts to the task app.
+        batches_per_step: Batches to collect per training step. Default: 1.
+    """
     env_name: str
     policy_name: str
     env_config: dict[str, Any] | None = None
@@ -37,10 +111,20 @@ class RolloutConfig(ExtraModel):
     episodes_per_batch: int
     max_concurrent_rollouts: int
     batches_per_step: int | None = None
-    ops: list[str] | None = None
 class WeightSyncConfig(ExtraModel):
+    """Weight synchronization configuration.
+    Controls how model weights are synchronized between training and inference.
+    Attributes:
+        enable: Whether to enable weight sync. Default: True.
+        targets: Sync targets, typically ["policy"].
+        mode: Sync mode (advanced).
+        direct: Use direct sync method.
+        verify_every_k: Verify sync every K iterations.
+    """
     enable: bool | None = None
     targets: list[str] | None = None
     mode: str | None = None
@@ -49,7 +133,18 @@ class WeightSyncConfig(ExtraModel):
 class RewardsConfig(ExtraModel):
-    """Rewards configuration for RL training."""
+    """Rewards configuration for RL training.
+    Controls step-level and event-level reward computation.
+    Attributes:
+        step_rewards_enabled: Enable step-level rewards. Default: False.
+        step_rewards_mode: Reward mode - "off", "decision_stepwise", or "env_sparse".
+        step_rewards_indicator_lambda: Lambda coefficient for indicator rewards.
+        step_rewards_beta: Beta coefficient for step rewards.
+        step_rewards_strategy: Reward computation strategy.
+        event_rewards_kind: Event reward aggregation - "unique" or "absolute".
+    """
     step_rewards_enabled: bool | None = None
     step_rewards_mode: str | None = None
     step_rewards_indicator_lambda: float | None = None
@@ -59,6 +154,23 @@ class RewardsConfig(ExtraModel):
 class RLTrainingConfig(ExtraModel):
+    """Training hyperparameters for RL.
+    Attributes:
+        num_epochs: Number of training epochs.
+        iterations_per_epoch: Training iterations per epoch.
+        gradient_accumulation_steps: Steps to accumulate gradients. Default: 1.
+        max_accumulated_minibatch: Maximum accumulated minibatch size.
+        max_turns: Maximum turns during training rollouts.
+        batch_size: Training batch size.
+        group_size: GSPO group size for advantage estimation.
+        learning_rate: Optimizer learning rate (e.g., 5e-5).
+        log_interval: Log metrics every N steps.
+        weight_sync_interval: Sync weights every N steps.
+        weight_sync: Nested weight sync configuration.
+        lora: LoRA configuration (r, alpha, dropout, target_modules).
+        rewards: Nested rewards configuration.
+    """
     num_epochs: int
     iterations_per_epoch: int
     gradient_accumulation_steps: int | None = None
@@ -83,12 +195,32 @@ class RLTrainingConfig(ExtraModel):
 class EvaluationConfig(ExtraModel):
+    """Evaluation configuration during training.
+    Attributes:
+        instances: Number of evaluation instances to run.
+        every_n_iters: Run evaluation every N training iterations.
+        seeds: List of seeds for reproducible evaluation.
+    """
     instances: int
     every_n_iters: int
     seeds: list[int]
-class JudgeOptionsConfig(ExtraModel):
+class VerifierOptionsConfig(ExtraModel):
+    """Verifier scoring options.
+    Attributes:
+        event: Enable event-level verification.
+        outcome: Enable outcome-level verification.
+        provider: Verifier provider - "synth" for Synth's hosted verifier.
+        model: Verifier model identifier.
+        rubric_id: Optional rubric identifier.
+        rubric_overrides: Override specific rubric parameters.
+        tracks: Tracks to evaluate.
+        weights: Per-track scoring weights.
+        max_concurrency: Maximum concurrent verifier API calls.
+    """
     event: bool | None = None
     outcome: bool | None = None
     provider: str | None = None
@@ -101,22 +233,61 @@ class JudgeOptionsConfig(ExtraModel):
 class RubricConfig(ExtraModel):
-    """Rubric configuration for reward blending."""
+    """Rubric configuration for reward blending.
+    Attributes:
+        enabled: Enable rubric-based scoring. Default: False.
+        reward_blend: Weights for reward sources - {"env": 1.0, "event": 0.0, "outcome": 0.0}.
+    """
     enabled: bool = False
     reward_blend: dict[str, float] | None = None  # env, event, outcome weights
-class JudgeConfig(ExtraModel):
+class VerifierConfig(ExtraModel):
+    """Verifier configuration for LLM-based reward scoring.
+    Attributes:
+        type: Verifier type - "synth" for Synth's hosted verifier.
+        timeout_s: Timeout in seconds for verifier API calls.
+        enabled: Master switch to enable/disable verifier scoring.
+        reward_blend: Reward source weights - {"env": 1.0, "event": 0.0, "outcome": 0.0}.
+        rubric: Deprecated - use reward_blend instead.
+        options: Detailed verifier options.
+    """
     type: str | None = None
     timeout_s: int | None = None
-    enabled: bool | None = None  # Master switch for judge/rubric
+    enabled: bool | None = None  # Master switch for verifier/rubric
     reward_blend: dict[str, float] | None = None  # NEW: nested reward blending (replaces rubric.weights)
     rubric: RubricConfig | None = None  # DEPRECATED: use flat fields instead
-    options: JudgeOptionsConfig | None = None
+    options: VerifierOptionsConfig | None = None
 class SmokeConfig(ExtraModel):
-    """Configuration for local smoke testing (CLI only, ignored by trainer)."""
+    """Configuration for local smoke testing (CLI only, ignored by trainer).
+    Use this section to configure quick local tests before submitting
+    a full training job.
+    Attributes:
+        task_url: Override task app URL for testing.
+        env_name: Environment name to test.
+        policy_name: Policy name to test.
+        max_steps: Maximum steps for smoke test.
+        policy: Policy type - "mock", "gpt-5-nano", "openai", "groq".
+        model: Model identifier for the policy.
+        mock_backend: Mock backend type - "synthetic" or "openai".
+        mock_port: Port for mock backend.
+        return_trace: Include trace in response.
+        use_mock: Use mock policy.
+        task_app_name: Task app to auto-serve (e.g., "grpo-crafter").
+        task_app_port: Port for auto-served task app. Default: 8765.
+        task_app_env_file: Path to .env file for task app.
+        task_app_force: Use --force flag when serving.
+        sqld_auto_start: Auto-start sqld server.
+        sqld_db_path: Database path. Default: ./traces/local.db.
+        sqld_hrana_port: Hrana WebSocket port. Default: 8080.
+        sqld_http_port: HTTP API port. Default: 8081.
+    """
     # Test parameters
     task_url: str | None = None
     env_name: str | None = None
@@ -128,13 +299,13 @@ class SmokeConfig(ExtraModel):
     mock_port: int | None = None
     return_trace: bool | None = None
     use_mock: bool | None = None
     # Task app auto-start configuration
     task_app_name: str | None = None  # Task app to serve (e.g., "grpo-crafter")
     task_app_port: int | None = None  # Port for task app (default: 8765)
     task_app_env_file: str | None = None  # Path to .env file for task app
     task_app_force: bool | None = None  # Use --force flag when serving
     # sqld auto-start configuration
     sqld_auto_start: bool | None = None  # Auto-start sqld server
     sqld_db_path: str | None = None  # Database path (default: ./traces/local.db)
@@ -143,6 +314,67 @@ class SmokeConfig(ExtraModel):
 class RLConfig(ExtraModel):
+    """Root configuration for RL (Reinforcement Learning) training jobs.
+    This is the top-level config loaded from a TOML file. Use `RLConfig.from_path()`
+    to load from a file, or `RLConfig.from_mapping()` to load from a dict.
+    Example:
+        ```python
+        from synth_ai.sdk.api.train.configs.rl import RLConfig
+        # Load from file
+        config = RLConfig.from_path("rl_config.toml")
+        # Or from dict
+        config = RLConfig.from_mapping({
+            "algorithm": {"type": "online", "method": "policy_gradient", "variety": "gspo"},
+            "services": {"task_url": "https://my-tunnel.trycloudflare.com"},
+            "model": {"base": "Qwen/Qwen3-4B", "trainer_mode": "lora", "label": "my-model"},
+            ...
+        })
+        ```
+    Attributes:
+        algorithm: Algorithm configuration (type, method, variety).
+        services: Service URLs (task_url, verifier_url).
+        compute: GPU and compute configuration.
+        topology: Deprecated - use compute.topology.
+        vllm: vLLM inference server configuration.
+        reference: Deprecated - use compute.topology.reference_placement.
+        model: Deprecated - use policy instead.
+        policy: Policy/model configuration (preferred).
+        lora: Deprecated - use training.lora.
+        rollout: Rollout/episode collection configuration.
+        evaluation: Evaluation configuration.
+        training: Training hyperparameters.
+        rubric: Deprecated - use verifier.reward_blend.
+        verifier: Verifier/reward configuration.
+        tags: Optional metadata tags.
+        smoke: CLI-only smoke testing configuration.
+    Returns:
+        After training completes, you receive a result dict:
+        ```python
+        {
+            "status": "succeeded",
+            "final_reward": 0.85,
+            "model_id": "ft:Qwen/Qwen3-0.6B:job_abc123",
+            "checkpoints": [
+                {"step": 100, "path": "..."},
+                {"step": 200, "path": "..."},
+            ],
+        }
+        ```
+    Events:
+        During training, you'll receive streaming events:
+        - `rl.created` - Job created
+        - `rl.running` - Training started
+        - `rl.iteration.complete` - Iteration finished with metrics
+        - `rl.evaluation.complete` - Evaluation finished with scores
+        - `rl.succeeded` / `rl.failed` - Terminal states
+    """
     algorithm: AlgorithmConfig
     services: RLServicesConfig
     compute: ComputeConfig | None = None
@@ -155,29 +387,45 @@ class RLConfig(ExtraModel):
     rollout: RolloutConfig | None = None
     evaluation: EvaluationConfig | None = None
     training: RLTrainingConfig | None = None
-    rubric: dict[str, Any] | None = None  # DEPRECATED: use judge.reward_blend and judge.enabled instead
-    judge: JudgeConfig | None = None
+    rubric: dict[str, Any] | None = None  # DEPRECATED: use verifier.reward_blend and verifier.enabled instead
+    verifier: VerifierConfig | None = None
     tags: dict[str, Any] | None = None
     smoke: SmokeConfig | None = None  # CLI-only: local smoke testing config (ignored by trainer)
     def to_dict(self) -> dict[str, Any]:
+        """Convert config to a dictionary."""
         return self.model_dump(mode="python", exclude_none=True)
     @classmethod
     def from_mapping(cls, data: Mapping[str, Any]) -> RLConfig:
-        """Load RL config from dict/TOML mapping."""
+        """Load RL config from dict/TOML mapping.
+        Args:
+            data: Dictionary or TOML mapping with configuration.
+        Returns:
+            Validated RLConfig instance.
+        """
         return cls.model_validate(data)
     @classmethod
     def from_path(cls, path: Path) -> RLConfig:
+        """Load RL config from a TOML file.
+        Args:
+            path: Path to the TOML configuration file.
+        Returns:
+            Validated RLConfig instance.
+        """
         content = load_toml(path)
         return cls.from_mapping(content)
 __all__ = [
     "EvaluationConfig",
-    "JudgeConfig",
-    "JudgeOptionsConfig",
+    "VerifierConfig",
+    "VerifierOptionsConfig",
     "ModelConfig",
     "RLConfig",
     "RLServicesConfig",

synth_ai/sdk/api/train/configs/sft.py CHANGED Viewed

@@ -1,3 +1,40 @@
+"""SFT (Supervised Fine-Tuning) configuration models.
+This module defines the configuration schema for SFT training jobs.
+Example TOML configuration:
+    ```toml
+    [algorithm]
+    type = "offline"
+    method = "sft"
+    [job]
+    model = "Qwen/Qwen3-4B"
+    data_path = "training_data.jsonl"
+    [compute]
+    gpu_type = "H100"
+    gpu_count = 1
+    [training]
+    mode = "lora"
+    [training.lora]
+    r = 16
+    alpha = 32
+    dropout = 0.1
+    [hyperparameters]
+    n_epochs = 3
+    batch_size = 4
+    learning_rate = 2e-5
+    sequence_length = 2048
+    ```
+See Also:
+    - Training reference: /training/sft
+    - Quickstart: /quickstart/supervised-fine-tuning
+"""
 from __future__ import annotations
 from collections.abc import Mapping
@@ -11,6 +48,14 @@ from .shared import AlgorithmConfig, ComputeConfig, ExtraModel, LoraConfig, Poli
 class JobConfig(ExtraModel):
+    """Core job configuration for SFT.
+    Attributes:
+        model: Base model to fine-tune (e.g., "Qwen/Qwen3-4B", "meta-llama/Llama-3-8B").
+        data: Dataset identifier (if using registered datasets).
+        data_path: Path to JSONL training data file.
+        poll_seconds: Polling interval for job status. Default: 10.
+    """
     model: str
     data: str | None = None
     data_path: str | None = None
@@ -18,11 +63,27 @@ class JobConfig(ExtraModel):
 class SFTDataConfig(ExtraModel):
+    """Data configuration for SFT training.
+    Attributes:
+        topology: Data loading topology configuration.
+        validation_path: Path to validation JSONL file for eval during training.
+    """
     topology: dict[str, Any] | None = None
     validation_path: str | None = None
 class TrainingValidationConfig(ExtraModel):
+    """Validation configuration during training.
+    Attributes:
+        enabled: Enable validation during training. Default: False.
+        evaluation_strategy: When to evaluate - "steps" or "epoch".
+        eval_steps: Evaluate every N steps (if strategy is "steps").
+        save_best_model_at_end: Save only the best model checkpoint.
+        metric_for_best_model: Metric to use for best model selection (e.g., "eval_loss").
+        greater_is_better: Whether higher metric is better. Default: False for loss.
+    """
     enabled: bool | None = None
     evaluation_strategy: str | None = None
     eval_steps: int | None = None
@@ -32,6 +93,14 @@ class TrainingValidationConfig(ExtraModel):
 class TrainingConfig(ExtraModel):
+    """Training mode configuration.
+    Attributes:
+        mode: Training mode - "lora", "qlora", or "full".
+        use_qlora: Enable QLoRA (4-bit quantized LoRA). Default: False.
+        validation: Validation configuration.
+        lora: LoRA hyperparameters (r, alpha, dropout, target_modules).
+    """
     mode: str | None = None
     use_qlora: bool | None = None
     validation: TrainingValidationConfig | None = None
@@ -39,6 +108,18 @@ class TrainingConfig(ExtraModel):
 class HyperparametersParallelism(ExtraModel):
+    """Parallelism configuration for distributed training.
+    Attributes:
+        use_deepspeed: Enable DeepSpeed. Default: False.
+        deepspeed_stage: DeepSpeed ZeRO stage (1, 2, or 3).
+        fsdp: Enable PyTorch FSDP. Default: False.
+        bf16: Use bfloat16 precision. Default: True on supported hardware.
+        fp16: Use float16 precision. Default: False.
+        activation_checkpointing: Enable gradient checkpointing. Default: False.
+        tensor_parallel_size: Tensor parallelism degree.
+        pipeline_parallel_size: Pipeline parallelism degree.
+    """
     use_deepspeed: bool | None = None
     deepspeed_stage: int | None = None
     fsdp: bool | None = None
@@ -50,6 +131,21 @@ class HyperparametersParallelism(ExtraModel):
 class HyperparametersConfig(ExtraModel):
+    """Training hyperparameters for SFT.
+    Attributes:
+        n_epochs: Number of training epochs. Default: 1.
+        batch_size: Training batch size (alias for global_batch).
+        global_batch: Global batch size across all GPUs.
+        per_device_batch: Per-device batch size.
+        gradient_accumulation_steps: Steps to accumulate gradients. Default: 1.
+        sequence_length: Maximum sequence length. Default: 2048.
+        learning_rate: Optimizer learning rate (e.g., 2e-5).
+        warmup_ratio: Fraction of steps for LR warmup. Default: 0.1.
+        train_kind: Training variant (advanced).
+        weight_decay: Weight decay coefficient. Default: 0.01.
+        parallelism: Distributed training configuration.
+    """
     n_epochs: int = 1
     batch_size: int | None = None
     global_batch: int | None = None
@@ -64,6 +160,58 @@ class HyperparametersConfig(ExtraModel):
 class SFTConfig(ExtraModel):
+    """Root configuration for SFT (Supervised Fine-Tuning) jobs.
+    This is the top-level config loaded from a TOML file.
+    Example:
+        ```python
+        from synth_ai.sdk.api.train.configs.sft import SFTConfig
+        # Load from file
+        config = SFTConfig.from_path("sft_config.toml")
+        # Or from dict
+        config = SFTConfig.from_mapping({
+            "job": {"model": "Qwen/Qwen3-4B", "data_path": "data.jsonl"},
+            "hyperparameters": {"n_epochs": 3, "learning_rate": 2e-5},
+        })
+        ```
+    Attributes:
+        algorithm: Algorithm configuration (type="offline", method="sft").
+        job: Core job configuration (model, data_path).
+        policy: Policy configuration (preferred over job.model).
+        compute: GPU and compute configuration.
+        data: Data loading configuration.
+        training: Training mode (lora, full) and LoRA config.
+        hyperparameters: Training hyperparameters.
+        lora: Deprecated - use training.lora instead.
+        tags: Optional metadata tags.
+    Returns:
+        After training completes, you receive a result dict:
+        ```python
+        {
+            "status": "succeeded",
+            "model_id": "ft:Qwen/Qwen3-4B:sft_abc123",
+            "final_loss": 0.42,
+            "checkpoints": [
+                {"epoch": 1, "loss": 0.65, "path": "..."},
+                {"epoch": 2, "loss": 0.52, "path": "..."},
+                {"epoch": 3, "loss": 0.42, "path": "..."},
+            ],
+        }
+        ```
+    Events:
+        During training, you'll receive streaming events:
+        - `sft.created` - Job created
+        - `sft.running` - Training started
+        - `sft.epoch.complete` - Epoch finished with loss
+        - `sft.checkpoint.saved` - Checkpoint saved
+        - `sft.succeeded` / `sft.failed` - Terminal states
+    """
     algorithm: AlgorithmConfig | None = None
     job: JobConfig
     policy: PolicyConfig | None = None  # NEW: unified policy section
@@ -75,15 +223,31 @@ class SFTConfig(ExtraModel):
     tags: dict[str, Any] | None = None
     def to_dict(self) -> dict[str, Any]:
+        """Convert config to a dictionary."""
         return self.model_dump(mode="python", exclude_none=True)
     @classmethod
     def from_mapping(cls, data: Mapping[str, Any]) -> SFTConfig:
-        """Load SFT config from dict/TOML mapping."""
+        """Load SFT config from dict/TOML mapping.
+        Args:
+            data: Dictionary or TOML mapping with configuration.
+        Returns:
+            Validated SFTConfig instance.
+        """
         return cls.model_validate(data)
     @classmethod
     def from_path(cls, path: Path) -> SFTConfig:
+        """Load SFT config from a TOML file.
+        Args:
+            path: Path to the TOML configuration file.
+        Returns:
+            Validated SFTConfig instance.
+        """
         content = load_toml(path)
         return cls.from_mapping(content)

synth_ai/sdk/api/train/graph_validators.py CHANGED Viewed

@@ -1,7 +1,7 @@
-"""TOML schema + validation for ADAS/Graphs jobs.
+"""TOML schema + validation for Graph Opt (GraphGen) jobs.
-Graphs jobs (aka ADAS jobs) are JSON-dataset-first, but for convenience we also
-support a small TOML wrapper that points at an GraphGenTaskSet JSON file plus a few
+Graph Opt jobs are JSON-dataset-first, but for convenience we also
+support a small TOML wrapper that points at a GraphGenTaskSet JSON file plus a few
 optimization knobs.
 Example `graph.toml`:
@@ -16,7 +16,7 @@ auto_start = true                 # optional
 [graph.metadata]
 session_id = "sess_123"
-parent_job_id = "adas_parent"
+parent_job_id = "graph_opt_parent"
 population_size = 4
 num_generations = 5
 ```
@@ -29,7 +29,7 @@ from dataclasses import dataclass
 from pathlib import Path
 from typing import Any, Dict, List, Optional, cast, Literal
-from .graphgen_models import GraphGenJobConfig, GraphGenTaskSet, load_graphgen_taskset
+    from .graphgen_models import GraphGenJobConfig, GraphGenTaskSet, load_graphgen_taskset
 from .graphgen_validators import GraphGenValidationError, validate_graphgen_job_config
@@ -112,8 +112,8 @@ def validate_graph_job_section(
             policy_provider=section.get("policy_provider"),
             rollout_budget=int(rollout_budget) if rollout_budget is not None else 100,
             proposer_effort=cast(Literal["low", "medium", "high"], str(proposer_effort)) if proposer_effort is not None else "medium",
-            judge_model=section.get("judge_model"),
-            judge_provider=section.get("judge_provider"),
+            verifier_model=section.get("verifier_model"),
+            verifier_provider=section.get("verifier_provider"),
             population_size=section.get("population_size", 4),
             num_generations=section.get("num_generations"),
         )
@@ -151,17 +151,17 @@ def load_graph_job_toml(path: str | Path) -> GraphTomlResult:
     with open(path, "rb") as f:
         cfg = tomllib.load(f)
-    section = cfg.get("graph") or cfg.get("adas") or {}
+    section = cfg.get("graph") or {}
     return validate_graph_job_section(section, base_dir=path.parent)
 def validate_graph_job_payload(payload: Dict[str, Any]) -> None:
-    """Validate a graph/ADAS job payload (matching backend create request).
+    """Validate a graph job payload (matching backend create request).
     Expected keys:
       - dataset: GraphGenTaskSet dict
       - policy_model, rollout_budget, proposer_effort
-      - optional judge_model/judge_provider
+      - optional verifier_model/verifier_provider
       - optional metadata (population_size/num_generations)
     """
     errors: List[Dict[str, Any]] = []
@@ -188,8 +188,8 @@ def validate_graph_job_payload(payload: Dict[str, Any]) -> None:
             policy_provider=payload.get("policy_provider"),
             rollout_budget=int(payload.get("rollout_budget") or 100),
             proposer_effort=cast(Literal["low", "medium", "high"], str(payload.get("proposer_effort") or "medium")),
-            judge_model=payload.get("judge_model"),
-            judge_provider=payload.get("judge_provider"),
+            verifier_model=payload.get("verifier_model"),
+            verifier_provider=payload.get("verifier_provider"),
             population_size=metadata.get("population_size", 4),
             num_generations=metadata.get("num_generations"),
         )

synth-ai 0.4.1__py3-none-any.whl → 0.4.4__py3-none-any.whl

Potentially problematic release.

synth-ai 0.4.1py3-none-any.whl → 0.4.4py3-none-any.whl