synth-ai 0.2.16__py3-none-any.whl → 0.2.17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- examples/analyze_semantic_words.sh +2 -2
- examples/blog_posts/pokemon_vl/README.md +98 -0
- examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +25 -0
- examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
- examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
- examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +42 -0
- examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
- examples/blog_posts/warming_up_to_rl/README.md +158 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
- examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
- examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
- examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +41 -0
- examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
- examples/dev/qwen3_32b_qlora_4xh100.toml +5 -0
- examples/multi_step/configs/crafter_rl_outcome.toml +1 -1
- examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +65 -107
- examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +1 -1
- examples/multi_step/configs/crafter_rl_stepwise_simple.toml +1 -1
- examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
- examples/multi_step/configs/verilog_rl_lora.toml +80 -123
- examples/qwen_coder/configs/coder_lora_30b.toml +1 -3
- examples/qwen_coder/configs/coder_lora_4b.toml +4 -1
- examples/qwen_coder/configs/coder_lora_small.toml +1 -3
- examples/qwen_vl/README.md +10 -12
- examples/qwen_vl/SETUP_COMPLETE.md +7 -8
- examples/qwen_vl/VISION_TESTS_COMPLETE.md +2 -3
- examples/qwen_vl/collect_data_via_cli.md +76 -84
- examples/qwen_vl/collect_vision_traces.py +4 -4
- examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +40 -57
- examples/qwen_vl/configs/crafter_vlm_sft_example.toml +1 -2
- examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +20 -37
- examples/qwen_vl/configs/eval_gpt5nano_vision.toml +21 -40
- examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
- examples/qwen_vl/configs/{filter_qwen2vl_sft.toml → filter_qwen3vl_sft.toml} +4 -5
- examples/qwen_vl/configs/filter_vision_sft.toml +2 -3
- examples/qwen_vl/crafter_qwen_vl_agent.py +5 -5
- examples/qwen_vl/run_vision_comparison.sh +6 -7
- examples/rl/README.md +5 -5
- examples/rl/configs/rl_from_base_qwen.toml +26 -1
- examples/rl/configs/rl_from_base_qwen17.toml +5 -2
- examples/rl/task_app/README.md +1 -2
- examples/rl/task_app/math_single_step.py +2 -2
- examples/run_crafter_demo.sh +2 -2
- examples/sft/README.md +1 -1
- examples/sft/configs/crafter_fft_qwen0p6b.toml +4 -1
- examples/sft/configs/crafter_lora_qwen0p6b.toml +4 -1
- examples/swe/task_app/README.md +32 -2
- examples/swe/task_app/grpo_swe_mini.py +4 -0
- examples/swe/task_app/hosted/envs/crafter/react_agent.py +1 -1
- examples/swe/task_app/hosted/envs/mini_swe/environment.py +37 -10
- examples/swe/task_app/hosted/inference/openai_client.py +4 -4
- examples/swe/task_app/morph_backend.py +178 -0
- examples/task_apps/crafter/task_app/README.md +1 -1
- examples/task_apps/crafter/task_app/grpo_crafter.py +66 -3
- examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +1 -1
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +4 -26
- examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -2
- examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +17 -49
- examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +13 -5
- examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +15 -1
- examples/task_apps/enron/task_app/grpo_enron_task_app.py +1 -1
- examples/task_apps/math/README.md +1 -2
- examples/task_apps/pokemon_red/README.md +3 -4
- examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +6 -5
- examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +1 -2
- examples/task_apps/pokemon_red/task_app.py +36 -5
- examples/task_apps/sokoban/README.md +2 -3
- examples/task_apps/verilog/eval_groq_qwen32b.toml +12 -14
- examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +1 -1
- examples/vlm/configs/crafter_vlm_gpt4o.toml +4 -1
- examples/warming_up_to_rl/configs/crafter_fft.toml +4 -1
- examples/warming_up_to_rl/configs/crafter_fft_4b.toml +0 -2
- examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +2 -2
- examples/warming_up_to_rl/run_local_rollout_traced.py +1 -1
- examples/warming_up_to_rl/task_app/README.md +1 -1
- examples/warming_up_to_rl/task_app/grpo_crafter.py +134 -3
- examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +3 -27
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +4 -4
- examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +6 -3
- examples/workflows/math_rl/configs/rl_from_base_qwen.toml +27 -0
- examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +5 -0
- synth_ai/api/train/builders.py +9 -3
- synth_ai/api/train/cli.py +125 -10
- synth_ai/api/train/configs/__init__.py +8 -1
- synth_ai/api/train/configs/rl.py +32 -7
- synth_ai/api/train/configs/sft.py +6 -2
- synth_ai/api/train/configs/shared.py +59 -2
- synth_ai/auth/credentials.py +119 -0
- synth_ai/cli/__init__.py +12 -4
- synth_ai/cli/commands/__init__.py +17 -0
- synth_ai/cli/commands/demo/__init__.py +6 -0
- synth_ai/cli/commands/demo/core.py +163 -0
- synth_ai/cli/commands/deploy/__init__.py +23 -0
- synth_ai/cli/commands/deploy/core.py +614 -0
- synth_ai/cli/commands/deploy/errors.py +72 -0
- synth_ai/cli/commands/deploy/validation.py +11 -0
- synth_ai/cli/commands/eval/__init__.py +19 -0
- synth_ai/cli/commands/eval/core.py +1109 -0
- synth_ai/cli/commands/eval/errors.py +81 -0
- synth_ai/cli/commands/eval/validation.py +133 -0
- synth_ai/cli/commands/filter/__init__.py +12 -0
- synth_ai/cli/commands/filter/core.py +388 -0
- synth_ai/cli/commands/filter/errors.py +55 -0
- synth_ai/cli/commands/filter/validation.py +77 -0
- synth_ai/cli/commands/help/__init__.py +177 -0
- synth_ai/cli/commands/help/core.py +73 -0
- synth_ai/cli/commands/status/__init__.py +64 -0
- synth_ai/cli/commands/status/client.py +192 -0
- synth_ai/cli/commands/status/config.py +92 -0
- synth_ai/cli/commands/status/errors.py +20 -0
- synth_ai/cli/commands/status/formatters.py +164 -0
- synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
- synth_ai/cli/commands/status/subcommands/files.py +79 -0
- synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
- synth_ai/cli/commands/status/subcommands/models.py +79 -0
- synth_ai/cli/commands/status/subcommands/runs.py +81 -0
- synth_ai/cli/commands/status/subcommands/summary.py +47 -0
- synth_ai/cli/commands/status/utils.py +114 -0
- synth_ai/cli/commands/train/__init__.py +53 -0
- synth_ai/cli/commands/train/core.py +21 -0
- synth_ai/cli/commands/train/errors.py +117 -0
- synth_ai/cli/commands/train/judge_schemas.py +199 -0
- synth_ai/cli/commands/train/judge_validation.py +304 -0
- synth_ai/cli/commands/train/validation.py +443 -0
- synth_ai/cli/demo.py +2 -162
- synth_ai/cli/deploy/__init__.py +28 -0
- synth_ai/cli/deploy/core.py +5 -0
- synth_ai/cli/deploy/errors.py +23 -0
- synth_ai/cli/deploy/validation.py +5 -0
- synth_ai/cli/eval/__init__.py +36 -0
- synth_ai/cli/eval/core.py +5 -0
- synth_ai/cli/eval/errors.py +31 -0
- synth_ai/cli/eval/validation.py +5 -0
- synth_ai/cli/filter/__init__.py +28 -0
- synth_ai/cli/filter/core.py +5 -0
- synth_ai/cli/filter/errors.py +23 -0
- synth_ai/cli/filter/validation.py +5 -0
- synth_ai/cli/modal_serve/__init__.py +12 -0
- synth_ai/cli/modal_serve/core.py +14 -0
- synth_ai/cli/modal_serve/errors.py +8 -0
- synth_ai/cli/modal_serve/validation.py +11 -0
- synth_ai/cli/serve/__init__.py +12 -0
- synth_ai/cli/serve/core.py +14 -0
- synth_ai/cli/serve/errors.py +8 -0
- synth_ai/cli/serve/validation.py +11 -0
- synth_ai/cli/setup.py +20 -265
- synth_ai/cli/status.py +7 -126
- synth_ai/cli/task_app_deploy.py +1 -10
- synth_ai/cli/task_app_modal_serve.py +4 -9
- synth_ai/cli/task_app_serve.py +4 -11
- synth_ai/cli/task_apps.py +58 -1487
- synth_ai/cli/train/__init__.py +12 -0
- synth_ai/cli/train/core.py +21 -0
- synth_ai/cli/train/errors.py +8 -0
- synth_ai/cli/train/validation.py +24 -0
- synth_ai/cli/train.py +1 -14
- synth_ai/demos/crafter/grpo_crafter_task_app.py +1 -1
- synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
- synth_ai/environments/examples/red/engine.py +33 -12
- synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
- synth_ai/environments/examples/red/environment.py +26 -0
- synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
- synth_ai/http.py +12 -0
- synth_ai/judge_schemas.py +10 -11
- synth_ai/learning/rl/client.py +3 -1
- synth_ai/streaming/__init__.py +29 -0
- synth_ai/streaming/config.py +94 -0
- synth_ai/streaming/handlers.py +469 -0
- synth_ai/streaming/streamer.py +301 -0
- synth_ai/streaming/types.py +95 -0
- synth_ai/task/validators.py +2 -2
- synth_ai/tracing_v3/migration_helper.py +1 -2
- synth_ai/utils/env.py +25 -18
- synth_ai/utils/http.py +4 -1
- synth_ai/utils/modal.py +2 -2
- {synth_ai-0.2.16.dist-info → synth_ai-0.2.17.dist-info}/METADATA +8 -3
- {synth_ai-0.2.16.dist-info → synth_ai-0.2.17.dist-info}/RECORD +184 -109
- examples/qwen_vl/configs/eval_qwen2vl_vision.toml +0 -44
- synth_ai/cli/tui.py +0 -62
- synth_ai/tui/__init__.py +0 -5
- synth_ai/tui/__main__.py +0 -13
- synth_ai/tui/cli/__init__.py +0 -1
- synth_ai/tui/cli/query_experiments.py +0 -164
- synth_ai/tui/cli/query_experiments_v3.py +0 -164
- synth_ai/tui/dashboard.py +0 -911
- {synth_ai-0.2.16.dist-info → synth_ai-0.2.17.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.16.dist-info → synth_ai-0.2.17.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.2.16.dist-info → synth_ai-0.2.17.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.2.16.dist-info → synth_ai-0.2.17.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
"""`synth runs` command group."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
|
|
7
|
+
import click
|
|
8
|
+
|
|
9
|
+
from ..client import StatusAPIClient
|
|
10
|
+
from ..errors import StatusAPIError
|
|
11
|
+
from ..formatters import console, events_panel, print_json, runs_table
|
|
12
|
+
from ..utils import bail, common_options, parse_relative_time, resolve_context_config
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@click.group("runs", help="Inspect individual job runs/attempts.")
|
|
16
|
+
@click.pass_context
|
|
17
|
+
def runs_group(ctx: click.Context) -> None: # pragma: no cover - Click wiring
|
|
18
|
+
ctx.ensure_object(dict)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@runs_group.command("list")
|
|
22
|
+
@common_options()
|
|
23
|
+
@click.argument("job_id")
|
|
24
|
+
@click.option("--json", "output_json", is_flag=True)
|
|
25
|
+
@click.pass_context
|
|
26
|
+
def list_runs(
|
|
27
|
+
ctx: click.Context,
|
|
28
|
+
base_url: str | None,
|
|
29
|
+
api_key: str | None,
|
|
30
|
+
timeout: float,
|
|
31
|
+
job_id: str,
|
|
32
|
+
output_json: bool,
|
|
33
|
+
) -> None:
|
|
34
|
+
cfg = resolve_context_config(ctx, base_url=base_url, api_key=api_key, timeout=timeout)
|
|
35
|
+
|
|
36
|
+
async def _run() -> None:
|
|
37
|
+
try:
|
|
38
|
+
async with StatusAPIClient(cfg) as client:
|
|
39
|
+
runs = await client.list_job_runs(job_id)
|
|
40
|
+
if output_json:
|
|
41
|
+
print_json(runs)
|
|
42
|
+
else:
|
|
43
|
+
console.print(runs_table(runs))
|
|
44
|
+
except StatusAPIError as exc:
|
|
45
|
+
bail(f"Backend error: {exc}")
|
|
46
|
+
|
|
47
|
+
asyncio.run(_run())
|
|
48
|
+
|
|
49
|
+
|
|
50
|
+
@runs_group.command("logs")
|
|
51
|
+
@common_options()
|
|
52
|
+
@click.argument("job_id")
|
|
53
|
+
@click.option("--run", "run_id", required=True, help="Run identifier (number or ID) to inspect.")
|
|
54
|
+
@click.option("--since", help="Filter events after the supplied timestamp/relative offset.")
|
|
55
|
+
@click.option("--json", "output_json", is_flag=True)
|
|
56
|
+
@click.pass_context
|
|
57
|
+
def run_logs(
|
|
58
|
+
ctx: click.Context,
|
|
59
|
+
base_url: str | None,
|
|
60
|
+
api_key: str | None,
|
|
61
|
+
timeout: float,
|
|
62
|
+
job_id: str,
|
|
63
|
+
run_id: str,
|
|
64
|
+
since: str | None,
|
|
65
|
+
output_json: bool,
|
|
66
|
+
) -> None:
|
|
67
|
+
cfg = resolve_context_config(ctx, base_url=base_url, api_key=api_key, timeout=timeout)
|
|
68
|
+
since_filter = parse_relative_time(since)
|
|
69
|
+
|
|
70
|
+
async def _run() -> None:
|
|
71
|
+
try:
|
|
72
|
+
async with StatusAPIClient(cfg) as client:
|
|
73
|
+
events = await client.get_job_events(job_id, since=since_filter, run_id=run_id)
|
|
74
|
+
if output_json:
|
|
75
|
+
print_json(events)
|
|
76
|
+
else:
|
|
77
|
+
console.print(events_panel(events))
|
|
78
|
+
except StatusAPIError as exc:
|
|
79
|
+
bail(f"Backend error: {exc}")
|
|
80
|
+
|
|
81
|
+
asyncio.run(_run())
|
|
@@ -0,0 +1,47 @@
|
|
|
1
|
+
"""`synth status summary` command."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
|
|
7
|
+
import click
|
|
8
|
+
|
|
9
|
+
from ..client import StatusAPIClient
|
|
10
|
+
from ..errors import StatusAPIError
|
|
11
|
+
from ..formatters import console, files_table, jobs_table, models_table
|
|
12
|
+
from ..utils import common_options, resolve_context_config
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@click.command("summary", help="Show a condensed overview of recent jobs, models, and files.")
|
|
16
|
+
@common_options()
|
|
17
|
+
@click.option("--limit", default=5, show_default=True, type=int, help="Rows per section.")
|
|
18
|
+
@click.pass_context
|
|
19
|
+
def summary_command(
|
|
20
|
+
ctx: click.Context,
|
|
21
|
+
base_url: str | None,
|
|
22
|
+
api_key: str | None,
|
|
23
|
+
timeout: float,
|
|
24
|
+
limit: int,
|
|
25
|
+
) -> None:
|
|
26
|
+
cfg = resolve_context_config(ctx, base_url=base_url, api_key=api_key, timeout=timeout)
|
|
27
|
+
|
|
28
|
+
async def _run() -> tuple[list[dict[str, object]], list[dict[str, object]], list[dict[str, object]]]:
|
|
29
|
+
async with StatusAPIClient(cfg) as client:
|
|
30
|
+
try:
|
|
31
|
+
jobs = await client.list_jobs(limit=limit)
|
|
32
|
+
except StatusAPIError:
|
|
33
|
+
jobs = []
|
|
34
|
+
try:
|
|
35
|
+
models = await client.list_models(limit=limit)
|
|
36
|
+
except StatusAPIError:
|
|
37
|
+
models = []
|
|
38
|
+
try:
|
|
39
|
+
files = await client.list_files(limit=limit)
|
|
40
|
+
except StatusAPIError:
|
|
41
|
+
files = []
|
|
42
|
+
return jobs, models, files
|
|
43
|
+
|
|
44
|
+
jobs, models, files = asyncio.run(_run())
|
|
45
|
+
console.print(jobs_table(jobs[:limit]))
|
|
46
|
+
console.print(models_table(models[:limit]))
|
|
47
|
+
console.print(files_table(files[:limit]))
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
"""Shared utilities for status commands."""
|
|
2
|
+
|
|
3
|
+
from __future__ import annotations
|
|
4
|
+
|
|
5
|
+
import asyncio
|
|
6
|
+
from collections.abc import Callable, Coroutine
|
|
7
|
+
from datetime import UTC, datetime, timedelta
|
|
8
|
+
from typing import Any, TypeVar
|
|
9
|
+
|
|
10
|
+
import click
|
|
11
|
+
from rich.console import Console
|
|
12
|
+
|
|
13
|
+
from .config import DEFAULT_TIMEOUT, BackendConfig, resolve_backend_config
|
|
14
|
+
|
|
15
|
+
T = TypeVar("T")
|
|
16
|
+
|
|
17
|
+
console = Console()
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
def parse_relative_time(value: str | None) -> str | None:
|
|
21
|
+
"""Convert relative time expressions (e.g., '5m', '2h', '1d') to ISO strings."""
|
|
22
|
+
if not value:
|
|
23
|
+
return None
|
|
24
|
+
token = value.strip().lower()
|
|
25
|
+
if not token:
|
|
26
|
+
return None
|
|
27
|
+
multiplier = 1.0
|
|
28
|
+
if token.endswith("ms"):
|
|
29
|
+
multiplier = 0.001
|
|
30
|
+
token = token[:-2]
|
|
31
|
+
elif token.endswith("s"):
|
|
32
|
+
multiplier = 1.0
|
|
33
|
+
token = token[:-1]
|
|
34
|
+
elif token.endswith("m"):
|
|
35
|
+
multiplier = 60.0
|
|
36
|
+
token = token[:-1]
|
|
37
|
+
elif token.endswith("h"):
|
|
38
|
+
multiplier = 3600.0
|
|
39
|
+
token = token[:-1]
|
|
40
|
+
elif token.endswith("d"):
|
|
41
|
+
multiplier = 86400.0
|
|
42
|
+
token = token[:-1]
|
|
43
|
+
|
|
44
|
+
try:
|
|
45
|
+
seconds = float(token) * multiplier
|
|
46
|
+
except ValueError:
|
|
47
|
+
return value
|
|
48
|
+
|
|
49
|
+
dt = datetime.now(UTC) - timedelta(seconds=seconds)
|
|
50
|
+
return dt.isoformat()
|
|
51
|
+
|
|
52
|
+
|
|
53
|
+
def ensure_async(fn: Callable[..., Coroutine[Any, Any, T]]) -> Callable[..., T]:
|
|
54
|
+
"""Decorator to run an async callable via asyncio.run inside Click commands."""
|
|
55
|
+
|
|
56
|
+
def wrapper(*args, **kwargs):
|
|
57
|
+
return asyncio.run(fn(*args, **kwargs))
|
|
58
|
+
|
|
59
|
+
return wrapper
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def resolve_context_config(
|
|
63
|
+
ctx: click.Context,
|
|
64
|
+
*,
|
|
65
|
+
base_url: str | None,
|
|
66
|
+
api_key: str | None,
|
|
67
|
+
timeout: float | None,
|
|
68
|
+
) -> BackendConfig:
|
|
69
|
+
if base_url is not None or api_key is not None or timeout not in (None, DEFAULT_TIMEOUT):
|
|
70
|
+
return resolve_backend_config(base_url=base_url, api_key=api_key, timeout=timeout)
|
|
71
|
+
obj = ctx.find_object(dict)
|
|
72
|
+
if obj and isinstance(obj.get("status_backend_config"), BackendConfig):
|
|
73
|
+
return obj["status_backend_config"]
|
|
74
|
+
return resolve_backend_config(base_url=base_url, api_key=api_key, timeout=timeout)
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
def warn(message: str) -> None:
|
|
78
|
+
console.print(f"[yellow]{message}[/yellow]")
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def bail(message: str) -> None:
|
|
82
|
+
raise click.ClickException(message)
|
|
83
|
+
|
|
84
|
+
|
|
85
|
+
def common_options() -> Callable[[Callable[..., T]], Callable[..., T]]:
|
|
86
|
+
"""Apply shared backend CLI options to a command."""
|
|
87
|
+
|
|
88
|
+
def decorator(func: Callable[..., T]) -> Callable[..., T]:
|
|
89
|
+
options = [
|
|
90
|
+
click.option(
|
|
91
|
+
"--base-url",
|
|
92
|
+
envvar="SYNTH_STATUS_BASE_URL",
|
|
93
|
+
default=None,
|
|
94
|
+
help="Override the Synth backend base URL for this command.",
|
|
95
|
+
),
|
|
96
|
+
click.option(
|
|
97
|
+
"--api-key",
|
|
98
|
+
envvar="SYNTH_STATUS_API_KEY",
|
|
99
|
+
default=None,
|
|
100
|
+
help="API key for the Synth backend.",
|
|
101
|
+
),
|
|
102
|
+
click.option(
|
|
103
|
+
"--timeout",
|
|
104
|
+
default=DEFAULT_TIMEOUT,
|
|
105
|
+
show_default=True,
|
|
106
|
+
type=float,
|
|
107
|
+
help="HTTP request timeout in seconds.",
|
|
108
|
+
),
|
|
109
|
+
]
|
|
110
|
+
for option in reversed(options):
|
|
111
|
+
func = option(func)
|
|
112
|
+
return func
|
|
113
|
+
|
|
114
|
+
return decorator
|
|
@@ -0,0 +1,53 @@
|
|
|
1
|
+
from .core import register, train_command
|
|
2
|
+
from .errors import (
|
|
3
|
+
InvalidJudgeConfigError,
|
|
4
|
+
InvalidRubricConfigError,
|
|
5
|
+
TrainCliError,
|
|
6
|
+
)
|
|
7
|
+
from .judge_schemas import (
|
|
8
|
+
JudgeConfig,
|
|
9
|
+
JudgeOptionsConfig,
|
|
10
|
+
JudgeRequestPayload,
|
|
11
|
+
RubricConfig,
|
|
12
|
+
RubricWeightsConfig,
|
|
13
|
+
build_judge_http_options,
|
|
14
|
+
)
|
|
15
|
+
from .judge_validation import (
|
|
16
|
+
check_for_deprecated_fields,
|
|
17
|
+
extract_and_validate_judge_rubric,
|
|
18
|
+
validate_judge_config,
|
|
19
|
+
validate_rubric_config,
|
|
20
|
+
)
|
|
21
|
+
from .validation import (
|
|
22
|
+
load_and_validate_rl,
|
|
23
|
+
load_and_validate_sft,
|
|
24
|
+
validate_rl_config,
|
|
25
|
+
validate_sft_config,
|
|
26
|
+
)
|
|
27
|
+
|
|
28
|
+
__all__ = [
|
|
29
|
+
# Core
|
|
30
|
+
"register",
|
|
31
|
+
"train_command",
|
|
32
|
+
# Errors
|
|
33
|
+
"TrainCliError",
|
|
34
|
+
"InvalidJudgeConfigError",
|
|
35
|
+
"InvalidRubricConfigError",
|
|
36
|
+
# SFT/RL validation
|
|
37
|
+
"validate_sft_config",
|
|
38
|
+
"validate_rl_config",
|
|
39
|
+
"load_and_validate_sft",
|
|
40
|
+
"load_and_validate_rl",
|
|
41
|
+
# Judge/Rubric schemas
|
|
42
|
+
"RubricWeightsConfig",
|
|
43
|
+
"RubricConfig",
|
|
44
|
+
"JudgeOptionsConfig",
|
|
45
|
+
"JudgeConfig",
|
|
46
|
+
"JudgeRequestPayload",
|
|
47
|
+
"build_judge_http_options",
|
|
48
|
+
# Judge/Rubric validation
|
|
49
|
+
"validate_rubric_config",
|
|
50
|
+
"validate_judge_config",
|
|
51
|
+
"extract_and_validate_judge_rubric",
|
|
52
|
+
"check_for_deprecated_fields",
|
|
53
|
+
]
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import click
|
|
4
|
+
from synth_ai.api.train.cli import (
|
|
5
|
+
register as _register_with_cli,
|
|
6
|
+
)
|
|
7
|
+
from synth_ai.api.train.cli import (
|
|
8
|
+
train_command as _train_command,
|
|
9
|
+
)
|
|
10
|
+
|
|
11
|
+
__all__ = ["register", "train_command"]
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def register(cli: click.Group) -> None:
|
|
15
|
+
"""Attach the train command to the root CLI."""
|
|
16
|
+
_register_with_cli(cli)
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def train_command(*args, **kwargs):
|
|
20
|
+
"""Entrypoint used by the train CLI command."""
|
|
21
|
+
return _train_command(*args, **kwargs)
|
|
@@ -0,0 +1,117 @@
|
|
|
1
|
+
from dataclasses import dataclass
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class TrainCliError(RuntimeError):
|
|
5
|
+
"""Base exception for train CLI failures."""
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
@dataclass(slots=True)
|
|
9
|
+
class TomlParseError(TrainCliError):
|
|
10
|
+
"""Raised when TOML file cannot be parsed."""
|
|
11
|
+
path: str
|
|
12
|
+
detail: str
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
@dataclass(slots=True)
|
|
16
|
+
class ConfigNotFoundError(TrainCliError):
|
|
17
|
+
"""Raised when config file is not found."""
|
|
18
|
+
path: str
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
@dataclass(slots=True)
|
|
22
|
+
class InvalidSFTConfigError(TrainCliError):
|
|
23
|
+
"""Raised when SFT configuration is invalid."""
|
|
24
|
+
detail: str
|
|
25
|
+
hint: str | None = None
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
@dataclass(slots=True)
|
|
29
|
+
class InvalidRLConfigError(TrainCliError):
|
|
30
|
+
"""Raised when RL configuration is invalid."""
|
|
31
|
+
detail: str
|
|
32
|
+
hint: str | None = None
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
@dataclass(slots=True)
|
|
36
|
+
class MissingAlgorithmError(TrainCliError):
|
|
37
|
+
"""Raised when [algorithm] section is missing or invalid."""
|
|
38
|
+
detail: str
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@dataclass(slots=True)
|
|
42
|
+
class MissingModelError(TrainCliError):
|
|
43
|
+
"""Raised when model specification is missing."""
|
|
44
|
+
detail: str
|
|
45
|
+
hint: str | None = None
|
|
46
|
+
|
|
47
|
+
|
|
48
|
+
@dataclass(slots=True)
|
|
49
|
+
class MissingDatasetError(TrainCliError):
|
|
50
|
+
"""Raised when dataset path is missing for SFT."""
|
|
51
|
+
detail: str
|
|
52
|
+
hint: str | None = None
|
|
53
|
+
|
|
54
|
+
|
|
55
|
+
@dataclass(slots=True)
|
|
56
|
+
class MissingComputeError(TrainCliError):
|
|
57
|
+
"""Raised when compute configuration is missing or incomplete."""
|
|
58
|
+
detail: str
|
|
59
|
+
hint: str | None = None
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
@dataclass(slots=True)
|
|
63
|
+
class UnsupportedAlgorithmError(TrainCliError):
|
|
64
|
+
"""Raised when algorithm type is not supported."""
|
|
65
|
+
algorithm_type: str
|
|
66
|
+
expected: str
|
|
67
|
+
hint: str | None = None
|
|
68
|
+
|
|
69
|
+
|
|
70
|
+
@dataclass(slots=True)
|
|
71
|
+
class InvalidHyperparametersError(TrainCliError):
|
|
72
|
+
"""Raised when hyperparameters are invalid."""
|
|
73
|
+
detail: str
|
|
74
|
+
parameter: str | None = None
|
|
75
|
+
|
|
76
|
+
|
|
77
|
+
@dataclass(slots=True)
|
|
78
|
+
class InvalidTopologyError(TrainCliError):
|
|
79
|
+
"""Raised when topology configuration is invalid."""
|
|
80
|
+
detail: str
|
|
81
|
+
hint: str | None = None
|
|
82
|
+
|
|
83
|
+
|
|
84
|
+
@dataclass(slots=True)
|
|
85
|
+
class InvalidJudgeConfigError(TrainCliError):
|
|
86
|
+
"""Raised when judge configuration validation fails."""
|
|
87
|
+
detail: str
|
|
88
|
+
|
|
89
|
+
def __str__(self) -> str:
|
|
90
|
+
return self.detail
|
|
91
|
+
|
|
92
|
+
|
|
93
|
+
@dataclass(slots=True)
|
|
94
|
+
class InvalidRubricConfigError(TrainCliError):
|
|
95
|
+
"""Raised when rubric configuration validation fails."""
|
|
96
|
+
detail: str
|
|
97
|
+
|
|
98
|
+
def __str__(self) -> str:
|
|
99
|
+
return self.detail
|
|
100
|
+
|
|
101
|
+
|
|
102
|
+
__all__ = [
|
|
103
|
+
"TrainCliError",
|
|
104
|
+
"TomlParseError",
|
|
105
|
+
"ConfigNotFoundError",
|
|
106
|
+
"InvalidSFTConfigError",
|
|
107
|
+
"InvalidRLConfigError",
|
|
108
|
+
"MissingAlgorithmError",
|
|
109
|
+
"MissingModelError",
|
|
110
|
+
"MissingDatasetError",
|
|
111
|
+
"MissingComputeError",
|
|
112
|
+
"UnsupportedAlgorithmError",
|
|
113
|
+
"InvalidHyperparametersError",
|
|
114
|
+
"InvalidTopologyError",
|
|
115
|
+
"InvalidJudgeConfigError",
|
|
116
|
+
"InvalidRubricConfigError",
|
|
117
|
+
]
|
|
@@ -0,0 +1,199 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Pydantic schemas for judge/rubric configuration.
|
|
3
|
+
|
|
4
|
+
These models define the ACTUAL fields used by the backend judge service,
|
|
5
|
+
with all dead code removed. This is the single source of truth for what
|
|
6
|
+
gets sent in HTTP requests.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
from __future__ import annotations
|
|
10
|
+
|
|
11
|
+
from typing import Any, Optional
|
|
12
|
+
|
|
13
|
+
from pydantic import Field, model_validator
|
|
14
|
+
from synth_ai.api.train.configs.shared import ExtraModel
|
|
15
|
+
|
|
16
|
+
__all__ = [
|
|
17
|
+
"RubricWeightsConfig",
|
|
18
|
+
"RubricConfig",
|
|
19
|
+
"JudgeOptionsConfig",
|
|
20
|
+
"JudgeConfig",
|
|
21
|
+
"JudgeRequestPayload",
|
|
22
|
+
]
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class RubricWeightsConfig(ExtraModel):
|
|
26
|
+
"""
|
|
27
|
+
Reward blending weights (client-side only, not sent to backend).
|
|
28
|
+
|
|
29
|
+
These weights control how env rewards, event judge scores, and outcome
|
|
30
|
+
judge scores are combined into a final reward signal for policy gradients.
|
|
31
|
+
|
|
32
|
+
Formula:
|
|
33
|
+
total_reward = (env * env_return) + (event * sum(event_scores)) + (outcome * outcome_score)
|
|
34
|
+
"""
|
|
35
|
+
env: float = Field(
|
|
36
|
+
default=1.0,
|
|
37
|
+
description="Weight for environment rewards (task app native rewards)",
|
|
38
|
+
ge=0.0,
|
|
39
|
+
)
|
|
40
|
+
event: float = Field(
|
|
41
|
+
default=0.0,
|
|
42
|
+
description="Weight for per-event judge scores (step-level judging)",
|
|
43
|
+
ge=0.0,
|
|
44
|
+
)
|
|
45
|
+
outcome: float = Field(
|
|
46
|
+
default=0.0,
|
|
47
|
+
description="Weight for outcome judge score (episode-level judging)",
|
|
48
|
+
ge=0.0,
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
@model_validator(mode="after")
|
|
52
|
+
def _validate_weights_sum(self) -> RubricWeightsConfig:
|
|
53
|
+
"""Ensure at least one weight is non-zero."""
|
|
54
|
+
if self.env == 0.0 and self.event == 0.0 and self.outcome == 0.0:
|
|
55
|
+
raise ValueError("At least one reward weight must be non-zero")
|
|
56
|
+
return self
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class RubricConfig(ExtraModel):
|
|
60
|
+
"""
|
|
61
|
+
Top-level rubric configuration.
|
|
62
|
+
|
|
63
|
+
Controls whether rubric-based judging is enabled and how rewards are blended.
|
|
64
|
+
"""
|
|
65
|
+
enabled: bool = Field(
|
|
66
|
+
default=False,
|
|
67
|
+
description="Master switch for rubric-based judging",
|
|
68
|
+
)
|
|
69
|
+
weights: RubricWeightsConfig = Field(
|
|
70
|
+
default_factory=RubricWeightsConfig,
|
|
71
|
+
description="Reward blending weights (env/event/outcome)",
|
|
72
|
+
)
|
|
73
|
+
|
|
74
|
+
|
|
75
|
+
class JudgeOptionsConfig(ExtraModel):
|
|
76
|
+
"""
|
|
77
|
+
Judge provider options (sent to backend in HTTP request).
|
|
78
|
+
|
|
79
|
+
These fields are sent in the "options" object of the judge score request.
|
|
80
|
+
All fields here map directly to the backend JudgeOptions schema.
|
|
81
|
+
"""
|
|
82
|
+
provider: str = Field(
|
|
83
|
+
...,
|
|
84
|
+
description="Judge provider type ('openai', 'groq', 'gemini')",
|
|
85
|
+
pattern=r"^(openai|groq|gemini)$",
|
|
86
|
+
)
|
|
87
|
+
model: str = Field(
|
|
88
|
+
...,
|
|
89
|
+
description="Model identifier (e.g., 'openai/gpt-oss-120b', 'gpt-5')",
|
|
90
|
+
min_length=1,
|
|
91
|
+
)
|
|
92
|
+
rubric_id: Optional[str] = Field(
|
|
93
|
+
default=None,
|
|
94
|
+
description="Base rubric identifier (e.g., 'crafter/bundle@v1')",
|
|
95
|
+
)
|
|
96
|
+
event: bool = Field(
|
|
97
|
+
default=True,
|
|
98
|
+
description="Enable per-event (step-level) judging",
|
|
99
|
+
)
|
|
100
|
+
outcome: bool = Field(
|
|
101
|
+
default=True,
|
|
102
|
+
description="Enable outcome (episode-level) judging",
|
|
103
|
+
)
|
|
104
|
+
timeout_s: Optional[float] = Field(
|
|
105
|
+
default=None,
|
|
106
|
+
description="Request timeout in seconds",
|
|
107
|
+
gt=0,
|
|
108
|
+
)
|
|
109
|
+
metadata: dict[str, Any] = Field(
|
|
110
|
+
default_factory=dict,
|
|
111
|
+
description="Optional metadata (e.g., {'async': true, 'custom_field': 'value'})",
|
|
112
|
+
)
|
|
113
|
+
rubric_overrides: dict[str, Any] = Field(
|
|
114
|
+
default_factory=dict,
|
|
115
|
+
description=(
|
|
116
|
+
"Static rubric criteria overrides (rarely used - TaskInfo overrides take priority). "
|
|
117
|
+
"Format: {'event': {'criteria': [...]}, 'outcome': {'criteria': [...]}}"
|
|
118
|
+
),
|
|
119
|
+
)
|
|
120
|
+
|
|
121
|
+
@model_validator(mode="after")
|
|
122
|
+
def _validate_at_least_one_enabled(self) -> JudgeOptionsConfig:
|
|
123
|
+
"""Ensure at least one judging type is enabled."""
|
|
124
|
+
if not self.event and not self.outcome:
|
|
125
|
+
raise ValueError("At least one of 'event' or 'outcome' must be enabled")
|
|
126
|
+
return self
|
|
127
|
+
|
|
128
|
+
|
|
129
|
+
class JudgeConfig(ExtraModel):
|
|
130
|
+
"""
|
|
131
|
+
Top-level judge configuration.
|
|
132
|
+
|
|
133
|
+
This is parsed from TOML [judge] section and contains all judge-related settings.
|
|
134
|
+
"""
|
|
135
|
+
options: JudgeOptionsConfig = Field(
|
|
136
|
+
...,
|
|
137
|
+
description="Judge provider options (sent to backend)",
|
|
138
|
+
)
|
|
139
|
+
|
|
140
|
+
|
|
141
|
+
# HTTP Request Payload Structures (for documentation/type safety)
|
|
142
|
+
|
|
143
|
+
class JudgeRequestPayload(ExtraModel):
|
|
144
|
+
"""
|
|
145
|
+
HTTP request payload structure for POST /api/judge/v1/score.
|
|
146
|
+
|
|
147
|
+
This is the ACTUAL payload sent to the backend judge service.
|
|
148
|
+
Used for type safety and documentation only.
|
|
149
|
+
"""
|
|
150
|
+
policy_name: str = Field(..., description="Name of the policy being evaluated")
|
|
151
|
+
task_app: dict[str, Any] = Field(..., description="Task app metadata (id, base_url)")
|
|
152
|
+
trace: dict[str, Any] = Field(..., description="Tracing v3 payload (event_history, metadata)")
|
|
153
|
+
options: dict[str, Any] = Field(..., description="Judge options (provider, model, etc.)")
|
|
154
|
+
|
|
155
|
+
class Config:
|
|
156
|
+
extra = "allow" # Backend might add extra fields
|
|
157
|
+
|
|
158
|
+
|
|
159
|
+
# Helper to convert to backend request format
|
|
160
|
+
|
|
161
|
+
def build_judge_http_options(
|
|
162
|
+
options_config: JudgeOptionsConfig,
|
|
163
|
+
*,
|
|
164
|
+
rubric_overrides_from_task_info: Optional[dict[str, Any]] = None,
|
|
165
|
+
) -> dict[str, Any]:
|
|
166
|
+
"""
|
|
167
|
+
Build the 'options' dict for HTTP request to backend judge.
|
|
168
|
+
|
|
169
|
+
Args:
|
|
170
|
+
options_config: Validated judge options from TOML
|
|
171
|
+
rubric_overrides_from_task_info: Dynamic overrides fetched from TaskInfo (takes priority)
|
|
172
|
+
|
|
173
|
+
Returns:
|
|
174
|
+
Dict ready to send in HTTP request payload
|
|
175
|
+
"""
|
|
176
|
+
payload = {
|
|
177
|
+
"provider": options_config.provider,
|
|
178
|
+
"model": options_config.model,
|
|
179
|
+
"event": options_config.event,
|
|
180
|
+
"outcome": options_config.outcome,
|
|
181
|
+
}
|
|
182
|
+
|
|
183
|
+
# Optional fields
|
|
184
|
+
if options_config.rubric_id:
|
|
185
|
+
payload["rubric_id"] = options_config.rubric_id
|
|
186
|
+
|
|
187
|
+
if options_config.timeout_s is not None:
|
|
188
|
+
payload["timeout_s"] = options_config.timeout_s
|
|
189
|
+
|
|
190
|
+
if options_config.metadata:
|
|
191
|
+
payload["metadata"] = options_config.metadata
|
|
192
|
+
|
|
193
|
+
# Rubric overrides: TaskInfo takes priority over static config
|
|
194
|
+
if rubric_overrides_from_task_info:
|
|
195
|
+
payload["rubric_overrides"] = rubric_overrides_from_task_info
|
|
196
|
+
elif options_config.rubric_overrides:
|
|
197
|
+
payload["rubric_overrides"] = options_config.rubric_overrides
|
|
198
|
+
|
|
199
|
+
return payload
|