PyPI - synth-ai - Versions diffs - 0.2.16__py3-none-any.whl → 0.2.17__py3-none-any.whl - Mend

synth-ai 0.2.16py3-none-any.whl → 0.2.17py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of synth-ai might be problematic. Click here for more details.

Files changed (192) hide show

examples/analyze_semantic_words.sh +2 -2
examples/blog_posts/pokemon_vl/README.md +98 -0
examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +25 -0
examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +42 -0
examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
examples/blog_posts/warming_up_to_rl/README.md +158 -0
examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +41 -0
examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
examples/dev/qwen3_32b_qlora_4xh100.toml +5 -0
examples/multi_step/configs/crafter_rl_outcome.toml +1 -1
examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +65 -107
examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +1 -1
examples/multi_step/configs/crafter_rl_stepwise_simple.toml +1 -1
examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
examples/multi_step/configs/verilog_rl_lora.toml +80 -123
examples/qwen_coder/configs/coder_lora_30b.toml +1 -3
examples/qwen_coder/configs/coder_lora_4b.toml +4 -1
examples/qwen_coder/configs/coder_lora_small.toml +1 -3
examples/qwen_vl/README.md +10 -12
examples/qwen_vl/SETUP_COMPLETE.md +7 -8
examples/qwen_vl/VISION_TESTS_COMPLETE.md +2 -3
examples/qwen_vl/collect_data_via_cli.md +76 -84
examples/qwen_vl/collect_vision_traces.py +4 -4
examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +40 -57
examples/qwen_vl/configs/crafter_vlm_sft_example.toml +1 -2
examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +20 -37
examples/qwen_vl/configs/eval_gpt5nano_vision.toml +21 -40
examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
examples/qwen_vl/configs/{filter_qwen2vl_sft.toml → filter_qwen3vl_sft.toml} +4 -5
examples/qwen_vl/configs/filter_vision_sft.toml +2 -3
examples/qwen_vl/crafter_qwen_vl_agent.py +5 -5
examples/qwen_vl/run_vision_comparison.sh +6 -7
examples/rl/README.md +5 -5
examples/rl/configs/rl_from_base_qwen.toml +26 -1
examples/rl/configs/rl_from_base_qwen17.toml +5 -2
examples/rl/task_app/README.md +1 -2
examples/rl/task_app/math_single_step.py +2 -2
examples/run_crafter_demo.sh +2 -2
examples/sft/README.md +1 -1
examples/sft/configs/crafter_fft_qwen0p6b.toml +4 -1
examples/sft/configs/crafter_lora_qwen0p6b.toml +4 -1
examples/swe/task_app/README.md +32 -2
examples/swe/task_app/grpo_swe_mini.py +4 -0
examples/swe/task_app/hosted/envs/crafter/react_agent.py +1 -1
examples/swe/task_app/hosted/envs/mini_swe/environment.py +37 -10
examples/swe/task_app/hosted/inference/openai_client.py +4 -4
examples/swe/task_app/morph_backend.py +178 -0
examples/task_apps/crafter/task_app/README.md +1 -1
examples/task_apps/crafter/task_app/grpo_crafter.py +66 -3
examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +1 -1
examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +4 -26
examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -2
examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +17 -49
examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +13 -5
examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +15 -1
examples/task_apps/enron/task_app/grpo_enron_task_app.py +1 -1
examples/task_apps/math/README.md +1 -2
examples/task_apps/pokemon_red/README.md +3 -4
examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +6 -5
examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +1 -2
examples/task_apps/pokemon_red/task_app.py +36 -5
examples/task_apps/sokoban/README.md +2 -3
examples/task_apps/verilog/eval_groq_qwen32b.toml +12 -14
examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +1 -1
examples/vlm/configs/crafter_vlm_gpt4o.toml +4 -1
examples/warming_up_to_rl/configs/crafter_fft.toml +4 -1
examples/warming_up_to_rl/configs/crafter_fft_4b.toml +0 -2
examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +2 -2
examples/warming_up_to_rl/run_local_rollout_traced.py +1 -1
examples/warming_up_to_rl/task_app/README.md +1 -1
examples/warming_up_to_rl/task_app/grpo_crafter.py +134 -3
examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +1 -1
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +3 -27
examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -1
examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +4 -4
examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +6 -3
examples/workflows/math_rl/configs/rl_from_base_qwen.toml +27 -0
examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +5 -0
synth_ai/api/train/builders.py +9 -3
synth_ai/api/train/cli.py +125 -10
synth_ai/api/train/configs/__init__.py +8 -1
synth_ai/api/train/configs/rl.py +32 -7
synth_ai/api/train/configs/sft.py +6 -2
synth_ai/api/train/configs/shared.py +59 -2
synth_ai/auth/credentials.py +119 -0
synth_ai/cli/__init__.py +12 -4
synth_ai/cli/commands/__init__.py +17 -0
synth_ai/cli/commands/demo/__init__.py +6 -0
synth_ai/cli/commands/demo/core.py +163 -0
synth_ai/cli/commands/deploy/__init__.py +23 -0
synth_ai/cli/commands/deploy/core.py +614 -0
synth_ai/cli/commands/deploy/errors.py +72 -0
synth_ai/cli/commands/deploy/validation.py +11 -0
synth_ai/cli/commands/eval/__init__.py +19 -0
synth_ai/cli/commands/eval/core.py +1109 -0
synth_ai/cli/commands/eval/errors.py +81 -0
synth_ai/cli/commands/eval/validation.py +133 -0
synth_ai/cli/commands/filter/__init__.py +12 -0
synth_ai/cli/commands/filter/core.py +388 -0
synth_ai/cli/commands/filter/errors.py +55 -0
synth_ai/cli/commands/filter/validation.py +77 -0
synth_ai/cli/commands/help/__init__.py +177 -0
synth_ai/cli/commands/help/core.py +73 -0
synth_ai/cli/commands/status/__init__.py +64 -0
synth_ai/cli/commands/status/client.py +192 -0
synth_ai/cli/commands/status/config.py +92 -0
synth_ai/cli/commands/status/errors.py +20 -0
synth_ai/cli/commands/status/formatters.py +164 -0
synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
synth_ai/cli/commands/status/subcommands/files.py +79 -0
synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
synth_ai/cli/commands/status/subcommands/models.py +79 -0
synth_ai/cli/commands/status/subcommands/runs.py +81 -0
synth_ai/cli/commands/status/subcommands/summary.py +47 -0
synth_ai/cli/commands/status/utils.py +114 -0
synth_ai/cli/commands/train/__init__.py +53 -0
synth_ai/cli/commands/train/core.py +21 -0
synth_ai/cli/commands/train/errors.py +117 -0
synth_ai/cli/commands/train/judge_schemas.py +199 -0
synth_ai/cli/commands/train/judge_validation.py +304 -0
synth_ai/cli/commands/train/validation.py +443 -0
synth_ai/cli/demo.py +2 -162
synth_ai/cli/deploy/__init__.py +28 -0
synth_ai/cli/deploy/core.py +5 -0
synth_ai/cli/deploy/errors.py +23 -0
synth_ai/cli/deploy/validation.py +5 -0
synth_ai/cli/eval/__init__.py +36 -0
synth_ai/cli/eval/core.py +5 -0
synth_ai/cli/eval/errors.py +31 -0
synth_ai/cli/eval/validation.py +5 -0
synth_ai/cli/filter/__init__.py +28 -0
synth_ai/cli/filter/core.py +5 -0
synth_ai/cli/filter/errors.py +23 -0
synth_ai/cli/filter/validation.py +5 -0
synth_ai/cli/modal_serve/__init__.py +12 -0
synth_ai/cli/modal_serve/core.py +14 -0
synth_ai/cli/modal_serve/errors.py +8 -0
synth_ai/cli/modal_serve/validation.py +11 -0
synth_ai/cli/serve/__init__.py +12 -0
synth_ai/cli/serve/core.py +14 -0
synth_ai/cli/serve/errors.py +8 -0
synth_ai/cli/serve/validation.py +11 -0
synth_ai/cli/setup.py +20 -265
synth_ai/cli/status.py +7 -126
synth_ai/cli/task_app_deploy.py +1 -10
synth_ai/cli/task_app_modal_serve.py +4 -9
synth_ai/cli/task_app_serve.py +4 -11
synth_ai/cli/task_apps.py +58 -1487
synth_ai/cli/train/__init__.py +12 -0
synth_ai/cli/train/core.py +21 -0
synth_ai/cli/train/errors.py +8 -0
synth_ai/cli/train/validation.py +24 -0
synth_ai/cli/train.py +1 -14
synth_ai/demos/crafter/grpo_crafter_task_app.py +1 -1
synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
synth_ai/environments/examples/red/engine.py +33 -12
synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
synth_ai/environments/examples/red/environment.py +26 -0
synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
synth_ai/http.py +12 -0
synth_ai/judge_schemas.py +10 -11
synth_ai/learning/rl/client.py +3 -1
synth_ai/streaming/__init__.py +29 -0
synth_ai/streaming/config.py +94 -0
synth_ai/streaming/handlers.py +469 -0
synth_ai/streaming/streamer.py +301 -0
synth_ai/streaming/types.py +95 -0
synth_ai/task/validators.py +2 -2
synth_ai/tracing_v3/migration_helper.py +1 -2
synth_ai/utils/env.py +25 -18
synth_ai/utils/http.py +4 -1
synth_ai/utils/modal.py +2 -2
{synth_ai-0.2.16.dist-info → synth_ai-0.2.17.dist-info}/METADATA +8 -3
{synth_ai-0.2.16.dist-info → synth_ai-0.2.17.dist-info}/RECORD +184 -109
examples/qwen_vl/configs/eval_qwen2vl_vision.toml +0 -44
synth_ai/cli/tui.py +0 -62
synth_ai/tui/__init__.py +0 -5
synth_ai/tui/__main__.py +0 -13
synth_ai/tui/cli/__init__.py +0 -1
synth_ai/tui/cli/query_experiments.py +0 -164
synth_ai/tui/cli/query_experiments_v3.py +0 -164
synth_ai/tui/dashboard.py +0 -911
{synth_ai-0.2.16.dist-info → synth_ai-0.2.17.dist-info}/WHEEL +0 -0
{synth_ai-0.2.16.dist-info → synth_ai-0.2.17.dist-info}/entry_points.txt +0 -0
{synth_ai-0.2.16.dist-info → synth_ai-0.2.17.dist-info}/licenses/LICENSE +0 -0
{synth_ai-0.2.16.dist-info → synth_ai-0.2.17.dist-info}/top_level.txt +0 -0

synth_ai/api/train/cli.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from __future__ import annotations
+import asyncio
 import importlib
 import os
 import time
@@ -17,10 +18,18 @@ try:
 except Exception as exc:  # pragma: no cover - critical dependency
     raise RuntimeError("Unable to load backend configuration helpers") from exc
+from synth_ai.streaming import (
+    CLIHandler,
+    JobStreamer,
+    LossCurveHandler,
+    StreamConfig,
+    StreamEndpoints,
+    StreamType,
+)
 from .builders import build_rl_payload, build_sft_payload
 from .config_finder import discover_configs, prompt_for_config
 from .env_resolver import KeySpec, resolve_env
-from .pollers import RLJobPoller, SFTJobPoller
 from .task_app import check_task_app_health
 from .utils import (
     REPO_ROOT,
@@ -135,6 +144,62 @@ def _default_backend() -> str:
     return f"{base}/api" if not base.endswith("/api") else base
+_DEFAULT_SFT_HIDDEN_EVENTS = {
+    "sft.created",
+    "sft.pricing.check.requested",
+    "sft.pricing.check.allowed",
+    "sft.stage",
+    "snapshot.fetch",
+    "hatchet.preflight",
+    "hatchet.submission.attempt",
+    "hatchet.submission.result",
+    "sft.running",
+    "sft.status",
+    "sft.worker.alive",
+    "sft.dispatch.selected",
+    "sft.config.prepared",
+    "sft.strategy.selected",
+    "sft.training.args",
+}
+_DEFAULT_RL_HIDDEN_SUBSTRINGS = {"modal", "hatchet"}
+def _build_stream_components(
+    stream_format: str,
+    *,
+    hidden_event_types: set[str] | None = None,
+    hidden_event_substrings: set[str] | None = None,
+) -> tuple[StreamConfig, list]:
+    """Return stream configuration and handlers for the requested format."""
+    if stream_format == "chart":
+        config = StreamConfig(
+            enabled_streams={StreamType.STATUS, StreamType.EVENTS, StreamType.METRICS},
+            event_types={
+                "sft.progress",
+                "sft.training.started",
+                "sft.training.finish",
+                "sft.validation.summary",
+                "rl.train.step",
+                "rl.train.started",
+                "rl.train.completed",
+                "workflow.completed",
+                "workflow.failed",
+            },
+            metric_names={"train.loss"},
+        )
+        handlers = [LossCurveHandler()]
+    else:
+        config = StreamConfig.default()
+        handlers = [
+            CLIHandler(
+                hidden_event_types=hidden_event_types or set(),
+                hidden_event_substrings=hidden_event_substrings or set(),
+            )
+        ]
+    return config, handlers
 @click.command("train")
 @click.option(
     "--config",
@@ -183,6 +248,13 @@ def _default_backend() -> str:
     "--poll-timeout", default=3600.0, type=float, help="Maximum seconds to poll before timing out"
 )
 @click.option("--poll-interval", default=5.0, type=float, help="Seconds between poll attempts")
+@click.option(
+    "--stream-format",
+    type=click.Choice(["cli", "chart"]),
+    default="cli",
+    show_default=True,
+    help="Streaming output style (cli = line updates, chart = live loss panel)",
+)
 @click.option(
     "--examples",
     "examples_limit",
@@ -204,6 +276,7 @@ def train_command(
     poll: bool,
     poll_timeout: float,
     poll_interval: float,
+    stream_format: str,
     examples_limit: int | None,
 ) -> None:
     """Interactive launcher for RL / SFT jobs."""
@@ -302,6 +375,7 @@ def train_command(
             poll=poll,
             poll_timeout=poll_timeout,
             poll_interval=poll_interval,
+            stream_format=stream_format,
         )
     else:
         dataset_override_path = Path(dataset_path).expanduser().resolve() if dataset_path else None
@@ -315,13 +389,22 @@ def train_command(
             poll=poll,
             poll_timeout=poll_timeout,
             poll_interval=poll_interval,
+            stream_format=stream_format,
             examples_limit=examples_limit,
         )
 def _wait_for_training_file(
-    backend_base: str, api_key: str, file_id: str, *, timeout: float = 120.0
+    backend_base: str, api_key: str, file_id: str, *, timeout: float = 10.0
 ) -> None:
+    """Wait for training file to be visible after upload.
+    Reduced from 120s to 10s because:
+    - POST response already confirms file is uploaded
+    - Backend now forces read-your-writes consistency
+    - By job creation time, replica lag has resolved
+    - Quick sanity check only, not critical path
+    """
     url = f"{backend_base.rstrip('/')}/files/{file_id}"
     headers = {"Authorization": f"Bearer {api_key}"}
     elapsed = 0.0
@@ -400,6 +483,7 @@ def handle_rl(
     poll: bool,
     poll_timeout: float,
     poll_interval: float,
+    stream_format: str,
 ) -> None:
     overrides: dict[str, Any] = {
         "backend": backend_base,
@@ -497,10 +581,25 @@ def handle_rl(
         click.echo(f"Created job {job_id} (polling disabled)")
         return
-    poller = RLJobPoller(backend_base, synth_key, interval=poll_interval, timeout=poll_timeout)
-    outcome = poller.poll_job(job_id)
-    click.echo(f"Final status: {outcome.status}")
-    click.echo(preview_json(outcome.payload, limit=600))
+    click.echo("\n=== Streaming Job Progress ===")
+    config, handlers = _build_stream_components(
+        stream_format, hidden_event_substrings=_DEFAULT_RL_HIDDEN_SUBSTRINGS
+    )
+    if stream_format == "chart":
+        click.echo("Using live loss chart (metric=train.loss)")
+    streamer = JobStreamer(
+        base_url=backend_base,
+        api_key=synth_key,
+        job_id=job_id,
+        endpoints=StreamEndpoints.rl(job_id),
+        config=config,
+        handlers=handlers,
+        interval_seconds=poll_interval,
+        timeout_seconds=poll_timeout,
+    )
+    final_status = asyncio.run(streamer.stream_until_terminal())
+    click.echo(f"Final status: {final_status.get('status', 'unknown')}")
+    click.echo(preview_json(final_status, limit=600))
 def handle_sft(
@@ -514,6 +613,7 @@ def handle_sft(
     poll: bool,
     poll_timeout: float,
     poll_interval: float,
+    stream_format: str,
     examples_limit: int | None,
 ) -> None:
     dataset_path = dataset_override
@@ -641,10 +741,25 @@ def handle_sft(
             click.echo(f"Started job {job_id} (polling disabled)")
             return
-        poller = SFTJobPoller(backend_base, synth_key, interval=poll_interval, timeout=poll_timeout)
-        outcome = poller.poll_job(job_id)
-        click.echo(f"Final status: {outcome.status}")
-        click.echo(preview_json(outcome.payload, limit=600))
+        click.echo("\n=== Streaming Job Progress ===")
+        config, handlers = _build_stream_components(
+            stream_format, hidden_event_types=_DEFAULT_SFT_HIDDEN_EVENTS
+        )
+        if stream_format == "chart":
+            click.echo("Using live loss chart (metric=train.loss)")
+        streamer = JobStreamer(
+            base_url=backend_base,
+            api_key=synth_key,
+            job_id=job_id,
+            endpoints=StreamEndpoints.learning(job_id),
+            config=config,
+            handlers=handlers,
+            interval_seconds=poll_interval,
+            timeout_seconds=poll_timeout,
+        )
+        final_status = asyncio.run(streamer.stream_until_terminal())
+        click.echo(f"Final status: {final_status.get('status', 'unknown')}")
+        click.echo(preview_json(final_status, limit=600))
     finally:
         if limited_path is not None:
             try:

synth_ai/api/train/configs/__init__.py CHANGED Viewed

@@ -5,10 +5,12 @@ from .rl import (
     JudgeConfig,
     JudgeOptionsConfig,
     ModelConfig,
+    RewardsConfig,
     RLConfig,
     RLServicesConfig,
     RLTrainingConfig,
     RolloutConfig,
+    RubricConfig,
     WeightSyncConfig,
 )
 from .sft import (
@@ -20,7 +22,7 @@ from .sft import (
     TrainingConfig,
     TrainingValidationConfig,
 )
-from .shared import AlgorithmConfig, ComputeConfig
+from .shared import AlgorithmConfig, ComputeConfig, LoraConfig, PolicyConfig, TopologyConfig
 __all__ = [
     "AlgorithmConfig",
@@ -31,13 +33,18 @@ __all__ = [
     "JobConfig",
     "JudgeConfig",
     "JudgeOptionsConfig",
+    "LoraConfig",
     "ModelConfig",
+    "PolicyConfig",
+    "RewardsConfig",
     "RLConfig",
     "RLServicesConfig",
     "RLTrainingConfig",
     "RolloutConfig",
+    "RubricConfig",
     "SFTConfig",
     "SFTDataConfig",
+    "TopologyConfig",
     "TrainingConfig",
     "TrainingValidationConfig",
     "WeightSyncConfig",

synth_ai/api/train/configs/rl.py CHANGED Viewed

@@ -7,7 +7,7 @@ from typing import Any
 from pydantic import model_validator
 from ..utils import load_toml
-from .shared import AlgorithmConfig, ComputeConfig, ExtraModel
+from .shared import AlgorithmConfig, ComputeConfig, ExtraModel, LoraConfig, PolicyConfig
 class RLServicesConfig(ExtraModel):
@@ -48,6 +48,16 @@ class WeightSyncConfig(ExtraModel):
     verify_every_k: int | None = None
+class RewardsConfig(ExtraModel):
+    """Rewards configuration for RL training."""
+    step_rewards_enabled: bool | None = None
+    step_rewards_mode: str | None = None
+    step_rewards_indicator_lambda: float | None = None
+    step_rewards_beta: float | None = None
+    step_rewards_strategy: str | None = None
+    event_rewards_kind: str | None = None
 class RLTrainingConfig(ExtraModel):
     num_epochs: int
     iterations_per_epoch: int
@@ -59,13 +69,17 @@ class RLTrainingConfig(ExtraModel):
     learning_rate: float
     log_interval: int | None = None
     weight_sync_interval: int | None = None
+    # DEPRECATED: flat reward fields (use rewards.* instead)
     step_rewards_enabled: bool | None = None
     step_rewards_mode: str | None = None
     step_rewards_indicator_lambda: float | None = None
     step_rewards_beta: float | None = None
     step_rewards_strategy: str | None = None
     event_rewards_kind: str | None = None
+    # NEW: nested configs
     weight_sync: WeightSyncConfig | None = None
+    lora: LoraConfig | None = None
+    rewards: RewardsConfig | None = None
 class EvaluationConfig(ExtraModel):
@@ -86,9 +100,18 @@ class JudgeOptionsConfig(ExtraModel):
     max_concurrency: int | None = None
+class RubricConfig(ExtraModel):
+    """Rubric configuration for reward blending."""
+    enabled: bool = False
+    reward_blend: dict[str, float] | None = None  # env, event, outcome weights
 class JudgeConfig(ExtraModel):
     type: str | None = None
     timeout_s: int | None = None
+    enabled: bool | None = None  # Master switch for judge/rubric
+    reward_blend: dict[str, float] | None = None  # NEW: nested reward blending (replaces rubric.weights)
+    rubric: RubricConfig | None = None  # DEPRECATED: use flat fields instead
     options: JudgeOptionsConfig | None = None
@@ -96,15 +119,16 @@ class RLConfig(ExtraModel):
     algorithm: AlgorithmConfig
     services: RLServicesConfig
     compute: ComputeConfig | None = None
-    topology: dict[str, Any] | None = None
+    topology: dict[str, Any] | None = None  # DEPRECATED: use compute.topology instead
     vllm: dict[str, Any] | None = None
-    reference: dict[str, Any] | None = None
-    model: ModelConfig
-    lora: dict[str, Any] | None = None
+    reference: dict[str, Any] | None = None  # DEPRECATED: use compute.topology.reference_placement instead
+    model: ModelConfig | None = None  # DEPRECATED: use policy instead
+    policy: PolicyConfig | None = None  # NEW: unified policy (preferred)
+    lora: dict[str, Any] | None = None  # DEPRECATED: use training.lora instead
     rollout: RolloutConfig | None = None
     evaluation: EvaluationConfig | None = None
     training: RLTrainingConfig | None = None
-    rubric: dict[str, Any] | None = None
+    rubric: dict[str, Any] | None = None  # DEPRECATED: use judge.reward_blend and judge.enabled instead
     judge: JudgeConfig | None = None
     tags: dict[str, Any] | None = None
@@ -113,7 +137,8 @@ class RLConfig(ExtraModel):
     @classmethod
     def from_mapping(cls, data: Mapping[str, Any]) -> RLConfig:
-        return cls.model_validate(dict(data))
+        """Load RL config from dict/TOML mapping."""
+        return cls.model_validate(data)
     @classmethod
     def from_path(cls, path: Path) -> RLConfig:

synth_ai/api/train/configs/sft.py CHANGED Viewed

@@ -7,7 +7,7 @@ from typing import Any
 from pydantic import Field
 from ..utils import load_toml
-from .shared import AlgorithmConfig, ComputeConfig, ExtraModel
+from .shared import AlgorithmConfig, ComputeConfig, ExtraModel, LoraConfig, PolicyConfig
 class JobConfig(ExtraModel):
@@ -35,6 +35,7 @@ class TrainingConfig(ExtraModel):
     mode: str | None = None
     use_qlora: bool | None = None
     validation: TrainingValidationConfig | None = None
+    lora: LoraConfig | None = None  # NEW: nested LoRA config
 class HyperparametersParallelism(ExtraModel):
@@ -65,10 +66,12 @@ class HyperparametersConfig(ExtraModel):
 class SFTConfig(ExtraModel):
     algorithm: AlgorithmConfig | None = None
     job: JobConfig
+    policy: PolicyConfig | None = None  # NEW: unified policy section
     compute: ComputeConfig | None = None
     data: SFTDataConfig | None = None
     training: TrainingConfig | None = None
     hyperparameters: HyperparametersConfig = Field(default_factory=HyperparametersConfig)
+    lora: dict[str, Any] | None = None  # DEPRECATED: use training.lora instead
     tags: dict[str, Any] | None = None
     def to_dict(self) -> dict[str, Any]:
@@ -76,7 +79,8 @@ class SFTConfig(ExtraModel):
     @classmethod
     def from_mapping(cls, data: Mapping[str, Any]) -> SFTConfig:
-        return cls.model_validate(dict(data))
+        """Load SFT config from dict/TOML mapping."""
+        return cls.model_validate(data)
     @classmethod
     def from_path(cls, path: Path) -> SFTConfig:

synth_ai/api/train/configs/shared.py CHANGED Viewed

@@ -1,6 +1,6 @@
 from __future__ import annotations
-from pydantic import BaseModel, ConfigDict
+from pydantic import BaseModel, ConfigDict, model_validator
 class ExtraModel(BaseModel):
@@ -15,10 +15,67 @@ class AlgorithmConfig(ExtraModel):
     variety: str
+class TopologyConfig(ExtraModel):
+    """Compute topology configuration - how GPUs are distributed across processes."""
+    type: str | None = None  # e.g., "single_node_split"
+    gpus_for_vllm: int | None = None
+    gpus_for_training: int | None = None
+    gpus_for_ref: int | None = None
+    tensor_parallel: int | None = None
+    reference_placement: str | None = None  # NEW: e.g., "none", "shared", "dedicated"
+class LoraConfig(ExtraModel):
+    """LoRA (Low-Rank Adaptation) training configuration."""
+    r: int | None = None  # Rank
+    alpha: int | None = None
+    dropout: float | None = None
+    target_modules: list[str] | None = None
 class ComputeConfig(ExtraModel):
     gpu_type: str
     gpu_count: int
     nodes: int | None = None
+    topology: TopologyConfig | None = None  # NEW: nested topology
+class PolicyConfig(ExtraModel):
+    """Unified policy configuration for both SFT and RL.
+    This is the SINGLE SOURCE OF TRUTH for:
+    - What model to use (model_name or source)
+    - How to sample from it (temperature, max_tokens, etc.)
+    - How to train it (trainer_mode, label)
+    """
+    # Model specification (exactly one required)
+    model_name: str | None = None  # e.g., "Qwen/Qwen3-4B"
+    source: str | None = None       # e.g., "ft:abc123" for checkpoints
+    # Sampling parameters (with sensible defaults)
+    max_tokens: int = 512
+    temperature: float = 0.7
+    top_p: float = 0.95
+    top_k: int | None = None
+    repetition_penalty: float = 1.0
+    stop_sequences: list[str] | None = None
+    # Training-specific
+    trainer_mode: str  # "lora", "full", "qlora"
+    label: str         # Model identifier/name
+    # Optional - for distributed inference
+    inference_url: str | None = None
+    @model_validator(mode="after")
+    def _ensure_exactly_one_source(self) -> PolicyConfig:
+        """Ensure exactly one of model_name or source is set."""
+        if not (bool(self.model_name) ^ bool(self.source)):
+            raise ValueError(
+                "Must set exactly one: [policy].model_name OR [policy].source"
+            )
+        return self
-__all__ = ["ExtraModel", "AlgorithmConfig", "ComputeConfig"]
+__all__ = ["ExtraModel", "AlgorithmConfig", "ComputeConfig", "PolicyConfig", "TopologyConfig", "LoraConfig"]

synth_ai/auth/credentials.py ADDED Viewed

@@ -0,0 +1,119 @@
+import contextlib
+import os
+import time
+import webbrowser
+import requests
+from requests import RequestException
+from synth_ai.utils.env import resolve_env_var, write_env_var_to_dotenv, write_env_var_to_json
+def fetch_credentials_from_web_browser_session(
+    browser: bool = True,
+    prod: bool = True
+) -> None:
+    synth_api_key = ''
+    env_api_key = ''
+    org_name = ''
+    if browser:
+        origin = "https://www.usesynth.ai" if prod else "http://localhost:3000"
+        init_url = f"{origin}/api/sdk/handshake/init"
+        token_url =f"{origin}/api/sdk/handshake/token"
+        print(f"\n🌐 Connecting to {origin} to fetch your Synth credentials")
+        # 1. Initialize browser handshake
+        try:
+            init_res = requests.post(init_url, timeout=10)
+        except RequestException as exc:
+            raise RuntimeError(f"Failed to reach handshake init endpoint: {exc}") from exc
+        if init_res.status_code != 200:
+            body = init_res.text.strip()
+            raise RuntimeError(f"Handshake init failed ({init_res.status_code}): {body or 'no response body'}")
+        try:
+            init_data = init_res.json()
+        except ValueError as exc:
+            raise RuntimeError("Handshake init returned malformed JSON.") from exc
+        device_code = str(init_data.get("device_code") or "").strip()
+        verification_uri = str(init_data.get("verification_uri") or "").strip()
+        if not device_code or not verification_uri:
+            raise RuntimeError("Handshake init response missing device_code or verification_uri.")
+        try:
+            expires_in = int(init_data.get("expires_in") or 600)
+        except (TypeError, ValueError):
+            expires_in = 120
+        try:
+            interval = max(int(init_data.get("interval") or 3), 1)
+        except (TypeError, ValueError):
+            interval = 3
+        # 2. Open browser to verification URL
+        with contextlib.suppress(Exception):
+            webbrowser.open(verification_uri)
+        deadline = time.time() + expires_in
+        handshake_data = None
+        # 3. Poll handshake token endpoint
+        while time.time() <= deadline:
+            try:
+                handshake_res = requests.post(
+                    token_url,
+                    json={"device_code": device_code},
+                    timeout=10,
+                )
+            except RequestException:
+                time.sleep(interval)
+                continue
+            if handshake_res.status_code == 200:
+                try:
+                    handshake_data = handshake_res.json()
+                except ValueError as exc:
+                    raise RuntimeError("Handshake token returned malformed JSON.") from exc
+                break
+            if handshake_res.status_code in (404, 410):
+                raise RuntimeError("Handshake failed: device code expired or was revoked.")
+            time.sleep(interval)
+        if handshake_data is None:
+            raise TimeoutError("Handshake timed out before credentials were returned.")
+        # 4. Extract credentials from handshake payload
+        org = handshake_data.get("org")
+        if not isinstance(org, dict):
+            org = {}
+        org_name = str(org.get("name") or "your organization").strip()
+        credentials = handshake_data.get("keys")
+        if not isinstance(credentials, dict):
+            credentials = {}
+        synth_api_key = str(credentials.get("synth") or "").strip()
+        env_api_key = str(credentials.get("rl_env") or "").strip()
+        print(f"\n✅ Connected to {org_name}")
+    # Load credentials to process environment and save credentials to .env and ~/synth-ai/config.json
+    if synth_api_key:
+        print("\nLoading SYNTH_API_KEY into process environment")
+        os.environ["SYNTH_API_KEY"] = synth_api_key
+    synth_api_key = resolve_env_var("SYNTH_API_KEY")
+    if env_api_key:
+        print("\nLoading ENVIRONMENT_API_KEY into process environment")
+        os.environ["ENVIRONMENT_API_KEY"] = env_api_key
+    env_api_key = resolve_env_var("ENVIRONMENT_API_KEY")
+    if browser:
+        print('')
+        write_env_var_to_json("SYNTH_API_KEY", synth_api_key, "~/.synth-ai/config.json")
+        write_env_var_to_dotenv("SYNTH_API_KEY", synth_api_key)
+        write_env_var_to_json("ENVIRONMENT_API_KEY", env_api_key, "~/.synth-ai/config.json")
+        write_env_var_to_dotenv("ENVIRONMENT_API_KEY", env_api_key)

synth_ai/cli/__init__.py CHANGED Viewed

@@ -52,9 +52,17 @@ if not _cli_module:
     raise ImportError("synth_ai.cli.root is required for CLI entrypoint")
 cli = _cli_module.cli  # type: ignore[attr-defined]
+# Register core commands implemented as standalone modules
+try:
+    from synth_ai.cli.setup import setup_cmd
+    cli.add_command(setup_cmd, name="setup")
+except Exception:
+    pass
 # Register optional subcommands packaged under synth_ai.cli.*
-for _module_path in ("synth_ai.cli.demo", "synth_ai.cli.turso"):
+for _module_path in ("synth_ai.cli.commands.demo", "synth_ai.cli.commands.status", "synth_ai.cli.turso"):
     module = _maybe_import(_module_path)
     if not module:
         continue
@@ -64,6 +72,9 @@ for _module_path in ("synth_ai.cli.demo", "synth_ai.cli.turso"):
     if fn:
         fn(cli)
+# Register help command
+_maybe_call("synth_ai.cli.commands.help.core", "register", cli)
 # Train CLI lives under synth_ai.api.train
 _maybe_call("synth_ai.api.train", "register", cli)
@@ -84,7 +95,4 @@ if _task_apps_module:
     if register_task_apps:
         register_task_apps(cli)
-# Register TUI command if dependencies allow
-_maybe_call("synth_ai.cli.tui", "register", cli)
 # Top-level 'info' alias removed; use `synth-ai task-app info` instead

synth_ai/cli/commands/__init__.py ADDED Viewed

@@ -0,0 +1,17 @@
+"""
+Structured CLI command implementations.
+Each subpackage under this namespace provides the core command entrypoints,
+validation helpers, and error types for a top-level CLI command (e.g. train,
+eval, deploy).
+"""
+from __future__ import annotations
+__all__ = [
+    "train",
+    "eval",
+    "filter",
+    "deploy",
+    "status",
+]

synth_ai/cli/commands/demo/__init__.py ADDED Viewed

@@ -0,0 +1,6 @@
+from __future__ import annotations
+from .core import command, register
+__all__ = ["command", "register"]

synth-ai 0.2.16__py3-none-any.whl → 0.2.17__py3-none-any.whl

Potentially problematic release.

synth-ai 0.2.16py3-none-any.whl → 0.2.17py3-none-any.whl