synth-ai 0.2.16__py3-none-any.whl → 0.2.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (192) hide show
  1. examples/analyze_semantic_words.sh +2 -2
  2. examples/blog_posts/pokemon_vl/README.md +98 -0
  3. examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +25 -0
  4. examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
  5. examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
  6. examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +42 -0
  7. examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
  8. examples/blog_posts/warming_up_to_rl/README.md +158 -0
  9. examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
  10. examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
  11. examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
  12. examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
  13. examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +41 -0
  14. examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
  15. examples/dev/qwen3_32b_qlora_4xh100.toml +5 -0
  16. examples/multi_step/configs/crafter_rl_outcome.toml +1 -1
  17. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +65 -107
  18. examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +1 -1
  19. examples/multi_step/configs/crafter_rl_stepwise_simple.toml +1 -1
  20. examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
  21. examples/multi_step/configs/verilog_rl_lora.toml +80 -123
  22. examples/qwen_coder/configs/coder_lora_30b.toml +1 -3
  23. examples/qwen_coder/configs/coder_lora_4b.toml +4 -1
  24. examples/qwen_coder/configs/coder_lora_small.toml +1 -3
  25. examples/qwen_vl/README.md +10 -12
  26. examples/qwen_vl/SETUP_COMPLETE.md +7 -8
  27. examples/qwen_vl/VISION_TESTS_COMPLETE.md +2 -3
  28. examples/qwen_vl/collect_data_via_cli.md +76 -84
  29. examples/qwen_vl/collect_vision_traces.py +4 -4
  30. examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +40 -57
  31. examples/qwen_vl/configs/crafter_vlm_sft_example.toml +1 -2
  32. examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +20 -37
  33. examples/qwen_vl/configs/eval_gpt5nano_vision.toml +21 -40
  34. examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
  35. examples/qwen_vl/configs/{filter_qwen2vl_sft.toml → filter_qwen3vl_sft.toml} +4 -5
  36. examples/qwen_vl/configs/filter_vision_sft.toml +2 -3
  37. examples/qwen_vl/crafter_qwen_vl_agent.py +5 -5
  38. examples/qwen_vl/run_vision_comparison.sh +6 -7
  39. examples/rl/README.md +5 -5
  40. examples/rl/configs/rl_from_base_qwen.toml +26 -1
  41. examples/rl/configs/rl_from_base_qwen17.toml +5 -2
  42. examples/rl/task_app/README.md +1 -2
  43. examples/rl/task_app/math_single_step.py +2 -2
  44. examples/run_crafter_demo.sh +2 -2
  45. examples/sft/README.md +1 -1
  46. examples/sft/configs/crafter_fft_qwen0p6b.toml +4 -1
  47. examples/sft/configs/crafter_lora_qwen0p6b.toml +4 -1
  48. examples/swe/task_app/README.md +32 -2
  49. examples/swe/task_app/grpo_swe_mini.py +4 -0
  50. examples/swe/task_app/hosted/envs/crafter/react_agent.py +1 -1
  51. examples/swe/task_app/hosted/envs/mini_swe/environment.py +37 -10
  52. examples/swe/task_app/hosted/inference/openai_client.py +4 -4
  53. examples/swe/task_app/morph_backend.py +178 -0
  54. examples/task_apps/crafter/task_app/README.md +1 -1
  55. examples/task_apps/crafter/task_app/grpo_crafter.py +66 -3
  56. examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +1 -1
  57. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +4 -26
  58. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -2
  59. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +17 -49
  60. examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +13 -5
  61. examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +15 -1
  62. examples/task_apps/enron/task_app/grpo_enron_task_app.py +1 -1
  63. examples/task_apps/math/README.md +1 -2
  64. examples/task_apps/pokemon_red/README.md +3 -4
  65. examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +6 -5
  66. examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +1 -2
  67. examples/task_apps/pokemon_red/task_app.py +36 -5
  68. examples/task_apps/sokoban/README.md +2 -3
  69. examples/task_apps/verilog/eval_groq_qwen32b.toml +12 -14
  70. examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +1 -1
  71. examples/vlm/configs/crafter_vlm_gpt4o.toml +4 -1
  72. examples/warming_up_to_rl/configs/crafter_fft.toml +4 -1
  73. examples/warming_up_to_rl/configs/crafter_fft_4b.toml +0 -2
  74. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +2 -2
  75. examples/warming_up_to_rl/run_local_rollout_traced.py +1 -1
  76. examples/warming_up_to_rl/task_app/README.md +1 -1
  77. examples/warming_up_to_rl/task_app/grpo_crafter.py +134 -3
  78. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +1 -1
  79. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +3 -27
  80. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -1
  81. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +4 -4
  82. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +6 -3
  83. examples/workflows/math_rl/configs/rl_from_base_qwen.toml +27 -0
  84. examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +5 -0
  85. synth_ai/api/train/builders.py +9 -3
  86. synth_ai/api/train/cli.py +125 -10
  87. synth_ai/api/train/configs/__init__.py +8 -1
  88. synth_ai/api/train/configs/rl.py +32 -7
  89. synth_ai/api/train/configs/sft.py +6 -2
  90. synth_ai/api/train/configs/shared.py +59 -2
  91. synth_ai/auth/credentials.py +119 -0
  92. synth_ai/cli/__init__.py +12 -4
  93. synth_ai/cli/commands/__init__.py +17 -0
  94. synth_ai/cli/commands/demo/__init__.py +6 -0
  95. synth_ai/cli/commands/demo/core.py +163 -0
  96. synth_ai/cli/commands/deploy/__init__.py +23 -0
  97. synth_ai/cli/commands/deploy/core.py +614 -0
  98. synth_ai/cli/commands/deploy/errors.py +72 -0
  99. synth_ai/cli/commands/deploy/validation.py +11 -0
  100. synth_ai/cli/commands/eval/__init__.py +19 -0
  101. synth_ai/cli/commands/eval/core.py +1109 -0
  102. synth_ai/cli/commands/eval/errors.py +81 -0
  103. synth_ai/cli/commands/eval/validation.py +133 -0
  104. synth_ai/cli/commands/filter/__init__.py +12 -0
  105. synth_ai/cli/commands/filter/core.py +388 -0
  106. synth_ai/cli/commands/filter/errors.py +55 -0
  107. synth_ai/cli/commands/filter/validation.py +77 -0
  108. synth_ai/cli/commands/help/__init__.py +177 -0
  109. synth_ai/cli/commands/help/core.py +73 -0
  110. synth_ai/cli/commands/status/__init__.py +64 -0
  111. synth_ai/cli/commands/status/client.py +192 -0
  112. synth_ai/cli/commands/status/config.py +92 -0
  113. synth_ai/cli/commands/status/errors.py +20 -0
  114. synth_ai/cli/commands/status/formatters.py +164 -0
  115. synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
  116. synth_ai/cli/commands/status/subcommands/files.py +79 -0
  117. synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
  118. synth_ai/cli/commands/status/subcommands/models.py +79 -0
  119. synth_ai/cli/commands/status/subcommands/runs.py +81 -0
  120. synth_ai/cli/commands/status/subcommands/summary.py +47 -0
  121. synth_ai/cli/commands/status/utils.py +114 -0
  122. synth_ai/cli/commands/train/__init__.py +53 -0
  123. synth_ai/cli/commands/train/core.py +21 -0
  124. synth_ai/cli/commands/train/errors.py +117 -0
  125. synth_ai/cli/commands/train/judge_schemas.py +199 -0
  126. synth_ai/cli/commands/train/judge_validation.py +304 -0
  127. synth_ai/cli/commands/train/validation.py +443 -0
  128. synth_ai/cli/demo.py +2 -162
  129. synth_ai/cli/deploy/__init__.py +28 -0
  130. synth_ai/cli/deploy/core.py +5 -0
  131. synth_ai/cli/deploy/errors.py +23 -0
  132. synth_ai/cli/deploy/validation.py +5 -0
  133. synth_ai/cli/eval/__init__.py +36 -0
  134. synth_ai/cli/eval/core.py +5 -0
  135. synth_ai/cli/eval/errors.py +31 -0
  136. synth_ai/cli/eval/validation.py +5 -0
  137. synth_ai/cli/filter/__init__.py +28 -0
  138. synth_ai/cli/filter/core.py +5 -0
  139. synth_ai/cli/filter/errors.py +23 -0
  140. synth_ai/cli/filter/validation.py +5 -0
  141. synth_ai/cli/modal_serve/__init__.py +12 -0
  142. synth_ai/cli/modal_serve/core.py +14 -0
  143. synth_ai/cli/modal_serve/errors.py +8 -0
  144. synth_ai/cli/modal_serve/validation.py +11 -0
  145. synth_ai/cli/serve/__init__.py +12 -0
  146. synth_ai/cli/serve/core.py +14 -0
  147. synth_ai/cli/serve/errors.py +8 -0
  148. synth_ai/cli/serve/validation.py +11 -0
  149. synth_ai/cli/setup.py +20 -265
  150. synth_ai/cli/status.py +7 -126
  151. synth_ai/cli/task_app_deploy.py +1 -10
  152. synth_ai/cli/task_app_modal_serve.py +4 -9
  153. synth_ai/cli/task_app_serve.py +4 -11
  154. synth_ai/cli/task_apps.py +58 -1487
  155. synth_ai/cli/train/__init__.py +12 -0
  156. synth_ai/cli/train/core.py +21 -0
  157. synth_ai/cli/train/errors.py +8 -0
  158. synth_ai/cli/train/validation.py +24 -0
  159. synth_ai/cli/train.py +1 -14
  160. synth_ai/demos/crafter/grpo_crafter_task_app.py +1 -1
  161. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
  162. synth_ai/environments/examples/red/engine.py +33 -12
  163. synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
  164. synth_ai/environments/examples/red/environment.py +26 -0
  165. synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
  166. synth_ai/http.py +12 -0
  167. synth_ai/judge_schemas.py +10 -11
  168. synth_ai/learning/rl/client.py +3 -1
  169. synth_ai/streaming/__init__.py +29 -0
  170. synth_ai/streaming/config.py +94 -0
  171. synth_ai/streaming/handlers.py +469 -0
  172. synth_ai/streaming/streamer.py +301 -0
  173. synth_ai/streaming/types.py +95 -0
  174. synth_ai/task/validators.py +2 -2
  175. synth_ai/tracing_v3/migration_helper.py +1 -2
  176. synth_ai/utils/env.py +25 -18
  177. synth_ai/utils/http.py +4 -1
  178. synth_ai/utils/modal.py +2 -2
  179. {synth_ai-0.2.16.dist-info → synth_ai-0.2.17.dist-info}/METADATA +8 -3
  180. {synth_ai-0.2.16.dist-info → synth_ai-0.2.17.dist-info}/RECORD +184 -109
  181. examples/qwen_vl/configs/eval_qwen2vl_vision.toml +0 -44
  182. synth_ai/cli/tui.py +0 -62
  183. synth_ai/tui/__init__.py +0 -5
  184. synth_ai/tui/__main__.py +0 -13
  185. synth_ai/tui/cli/__init__.py +0 -1
  186. synth_ai/tui/cli/query_experiments.py +0 -164
  187. synth_ai/tui/cli/query_experiments_v3.py +0 -164
  188. synth_ai/tui/dashboard.py +0 -911
  189. {synth_ai-0.2.16.dist-info → synth_ai-0.2.17.dist-info}/WHEEL +0 -0
  190. {synth_ai-0.2.16.dist-info → synth_ai-0.2.17.dist-info}/entry_points.txt +0 -0
  191. {synth_ai-0.2.16.dist-info → synth_ai-0.2.17.dist-info}/licenses/LICENSE +0 -0
  192. {synth_ai-0.2.16.dist-info → synth_ai-0.2.17.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,81 @@
1
+ """`synth runs` command group."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+
7
+ import click
8
+
9
+ from ..client import StatusAPIClient
10
+ from ..errors import StatusAPIError
11
+ from ..formatters import console, events_panel, print_json, runs_table
12
+ from ..utils import bail, common_options, parse_relative_time, resolve_context_config
13
+
14
+
15
+ @click.group("runs", help="Inspect individual job runs/attempts.")
16
+ @click.pass_context
17
+ def runs_group(ctx: click.Context) -> None: # pragma: no cover - Click wiring
18
+ ctx.ensure_object(dict)
19
+
20
+
21
+ @runs_group.command("list")
22
+ @common_options()
23
+ @click.argument("job_id")
24
+ @click.option("--json", "output_json", is_flag=True)
25
+ @click.pass_context
26
+ def list_runs(
27
+ ctx: click.Context,
28
+ base_url: str | None,
29
+ api_key: str | None,
30
+ timeout: float,
31
+ job_id: str,
32
+ output_json: bool,
33
+ ) -> None:
34
+ cfg = resolve_context_config(ctx, base_url=base_url, api_key=api_key, timeout=timeout)
35
+
36
+ async def _run() -> None:
37
+ try:
38
+ async with StatusAPIClient(cfg) as client:
39
+ runs = await client.list_job_runs(job_id)
40
+ if output_json:
41
+ print_json(runs)
42
+ else:
43
+ console.print(runs_table(runs))
44
+ except StatusAPIError as exc:
45
+ bail(f"Backend error: {exc}")
46
+
47
+ asyncio.run(_run())
48
+
49
+
50
+ @runs_group.command("logs")
51
+ @common_options()
52
+ @click.argument("job_id")
53
+ @click.option("--run", "run_id", required=True, help="Run identifier (number or ID) to inspect.")
54
+ @click.option("--since", help="Filter events after the supplied timestamp/relative offset.")
55
+ @click.option("--json", "output_json", is_flag=True)
56
+ @click.pass_context
57
+ def run_logs(
58
+ ctx: click.Context,
59
+ base_url: str | None,
60
+ api_key: str | None,
61
+ timeout: float,
62
+ job_id: str,
63
+ run_id: str,
64
+ since: str | None,
65
+ output_json: bool,
66
+ ) -> None:
67
+ cfg = resolve_context_config(ctx, base_url=base_url, api_key=api_key, timeout=timeout)
68
+ since_filter = parse_relative_time(since)
69
+
70
+ async def _run() -> None:
71
+ try:
72
+ async with StatusAPIClient(cfg) as client:
73
+ events = await client.get_job_events(job_id, since=since_filter, run_id=run_id)
74
+ if output_json:
75
+ print_json(events)
76
+ else:
77
+ console.print(events_panel(events))
78
+ except StatusAPIError as exc:
79
+ bail(f"Backend error: {exc}")
80
+
81
+ asyncio.run(_run())
@@ -0,0 +1,47 @@
1
+ """`synth status summary` command."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+
7
+ import click
8
+
9
+ from ..client import StatusAPIClient
10
+ from ..errors import StatusAPIError
11
+ from ..formatters import console, files_table, jobs_table, models_table
12
+ from ..utils import common_options, resolve_context_config
13
+
14
+
15
+ @click.command("summary", help="Show a condensed overview of recent jobs, models, and files.")
16
+ @common_options()
17
+ @click.option("--limit", default=5, show_default=True, type=int, help="Rows per section.")
18
+ @click.pass_context
19
+ def summary_command(
20
+ ctx: click.Context,
21
+ base_url: str | None,
22
+ api_key: str | None,
23
+ timeout: float,
24
+ limit: int,
25
+ ) -> None:
26
+ cfg = resolve_context_config(ctx, base_url=base_url, api_key=api_key, timeout=timeout)
27
+
28
+ async def _run() -> tuple[list[dict[str, object]], list[dict[str, object]], list[dict[str, object]]]:
29
+ async with StatusAPIClient(cfg) as client:
30
+ try:
31
+ jobs = await client.list_jobs(limit=limit)
32
+ except StatusAPIError:
33
+ jobs = []
34
+ try:
35
+ models = await client.list_models(limit=limit)
36
+ except StatusAPIError:
37
+ models = []
38
+ try:
39
+ files = await client.list_files(limit=limit)
40
+ except StatusAPIError:
41
+ files = []
42
+ return jobs, models, files
43
+
44
+ jobs, models, files = asyncio.run(_run())
45
+ console.print(jobs_table(jobs[:limit]))
46
+ console.print(models_table(models[:limit]))
47
+ console.print(files_table(files[:limit]))
@@ -0,0 +1,114 @@
1
+ """Shared utilities for status commands."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import asyncio
6
+ from collections.abc import Callable, Coroutine
7
+ from datetime import UTC, datetime, timedelta
8
+ from typing import Any, TypeVar
9
+
10
+ import click
11
+ from rich.console import Console
12
+
13
+ from .config import DEFAULT_TIMEOUT, BackendConfig, resolve_backend_config
14
+
15
+ T = TypeVar("T")
16
+
17
+ console = Console()
18
+
19
+
20
+ def parse_relative_time(value: str | None) -> str | None:
21
+ """Convert relative time expressions (e.g., '5m', '2h', '1d') to ISO strings."""
22
+ if not value:
23
+ return None
24
+ token = value.strip().lower()
25
+ if not token:
26
+ return None
27
+ multiplier = 1.0
28
+ if token.endswith("ms"):
29
+ multiplier = 0.001
30
+ token = token[:-2]
31
+ elif token.endswith("s"):
32
+ multiplier = 1.0
33
+ token = token[:-1]
34
+ elif token.endswith("m"):
35
+ multiplier = 60.0
36
+ token = token[:-1]
37
+ elif token.endswith("h"):
38
+ multiplier = 3600.0
39
+ token = token[:-1]
40
+ elif token.endswith("d"):
41
+ multiplier = 86400.0
42
+ token = token[:-1]
43
+
44
+ try:
45
+ seconds = float(token) * multiplier
46
+ except ValueError:
47
+ return value
48
+
49
+ dt = datetime.now(UTC) - timedelta(seconds=seconds)
50
+ return dt.isoformat()
51
+
52
+
53
+ def ensure_async(fn: Callable[..., Coroutine[Any, Any, T]]) -> Callable[..., T]:
54
+ """Decorator to run an async callable via asyncio.run inside Click commands."""
55
+
56
+ def wrapper(*args, **kwargs):
57
+ return asyncio.run(fn(*args, **kwargs))
58
+
59
+ return wrapper
60
+
61
+
62
+ def resolve_context_config(
63
+ ctx: click.Context,
64
+ *,
65
+ base_url: str | None,
66
+ api_key: str | None,
67
+ timeout: float | None,
68
+ ) -> BackendConfig:
69
+ if base_url is not None or api_key is not None or timeout not in (None, DEFAULT_TIMEOUT):
70
+ return resolve_backend_config(base_url=base_url, api_key=api_key, timeout=timeout)
71
+ obj = ctx.find_object(dict)
72
+ if obj and isinstance(obj.get("status_backend_config"), BackendConfig):
73
+ return obj["status_backend_config"]
74
+ return resolve_backend_config(base_url=base_url, api_key=api_key, timeout=timeout)
75
+
76
+
77
+ def warn(message: str) -> None:
78
+ console.print(f"[yellow]{message}[/yellow]")
79
+
80
+
81
+ def bail(message: str) -> None:
82
+ raise click.ClickException(message)
83
+
84
+
85
+ def common_options() -> Callable[[Callable[..., T]], Callable[..., T]]:
86
+ """Apply shared backend CLI options to a command."""
87
+
88
+ def decorator(func: Callable[..., T]) -> Callable[..., T]:
89
+ options = [
90
+ click.option(
91
+ "--base-url",
92
+ envvar="SYNTH_STATUS_BASE_URL",
93
+ default=None,
94
+ help="Override the Synth backend base URL for this command.",
95
+ ),
96
+ click.option(
97
+ "--api-key",
98
+ envvar="SYNTH_STATUS_API_KEY",
99
+ default=None,
100
+ help="API key for the Synth backend.",
101
+ ),
102
+ click.option(
103
+ "--timeout",
104
+ default=DEFAULT_TIMEOUT,
105
+ show_default=True,
106
+ type=float,
107
+ help="HTTP request timeout in seconds.",
108
+ ),
109
+ ]
110
+ for option in reversed(options):
111
+ func = option(func)
112
+ return func
113
+
114
+ return decorator
@@ -0,0 +1,53 @@
1
+ from .core import register, train_command
2
+ from .errors import (
3
+ InvalidJudgeConfigError,
4
+ InvalidRubricConfigError,
5
+ TrainCliError,
6
+ )
7
+ from .judge_schemas import (
8
+ JudgeConfig,
9
+ JudgeOptionsConfig,
10
+ JudgeRequestPayload,
11
+ RubricConfig,
12
+ RubricWeightsConfig,
13
+ build_judge_http_options,
14
+ )
15
+ from .judge_validation import (
16
+ check_for_deprecated_fields,
17
+ extract_and_validate_judge_rubric,
18
+ validate_judge_config,
19
+ validate_rubric_config,
20
+ )
21
+ from .validation import (
22
+ load_and_validate_rl,
23
+ load_and_validate_sft,
24
+ validate_rl_config,
25
+ validate_sft_config,
26
+ )
27
+
28
+ __all__ = [
29
+ # Core
30
+ "register",
31
+ "train_command",
32
+ # Errors
33
+ "TrainCliError",
34
+ "InvalidJudgeConfigError",
35
+ "InvalidRubricConfigError",
36
+ # SFT/RL validation
37
+ "validate_sft_config",
38
+ "validate_rl_config",
39
+ "load_and_validate_sft",
40
+ "load_and_validate_rl",
41
+ # Judge/Rubric schemas
42
+ "RubricWeightsConfig",
43
+ "RubricConfig",
44
+ "JudgeOptionsConfig",
45
+ "JudgeConfig",
46
+ "JudgeRequestPayload",
47
+ "build_judge_http_options",
48
+ # Judge/Rubric validation
49
+ "validate_rubric_config",
50
+ "validate_judge_config",
51
+ "extract_and_validate_judge_rubric",
52
+ "check_for_deprecated_fields",
53
+ ]
@@ -0,0 +1,21 @@
1
+ from __future__ import annotations
2
+
3
+ import click
4
+ from synth_ai.api.train.cli import (
5
+ register as _register_with_cli,
6
+ )
7
+ from synth_ai.api.train.cli import (
8
+ train_command as _train_command,
9
+ )
10
+
11
+ __all__ = ["register", "train_command"]
12
+
13
+
14
+ def register(cli: click.Group) -> None:
15
+ """Attach the train command to the root CLI."""
16
+ _register_with_cli(cli)
17
+
18
+
19
+ def train_command(*args, **kwargs):
20
+ """Entrypoint used by the train CLI command."""
21
+ return _train_command(*args, **kwargs)
@@ -0,0 +1,117 @@
1
+ from dataclasses import dataclass
2
+
3
+
4
+ class TrainCliError(RuntimeError):
5
+ """Base exception for train CLI failures."""
6
+
7
+
8
+ @dataclass(slots=True)
9
+ class TomlParseError(TrainCliError):
10
+ """Raised when TOML file cannot be parsed."""
11
+ path: str
12
+ detail: str
13
+
14
+
15
+ @dataclass(slots=True)
16
+ class ConfigNotFoundError(TrainCliError):
17
+ """Raised when config file is not found."""
18
+ path: str
19
+
20
+
21
+ @dataclass(slots=True)
22
+ class InvalidSFTConfigError(TrainCliError):
23
+ """Raised when SFT configuration is invalid."""
24
+ detail: str
25
+ hint: str | None = None
26
+
27
+
28
+ @dataclass(slots=True)
29
+ class InvalidRLConfigError(TrainCliError):
30
+ """Raised when RL configuration is invalid."""
31
+ detail: str
32
+ hint: str | None = None
33
+
34
+
35
+ @dataclass(slots=True)
36
+ class MissingAlgorithmError(TrainCliError):
37
+ """Raised when [algorithm] section is missing or invalid."""
38
+ detail: str
39
+
40
+
41
+ @dataclass(slots=True)
42
+ class MissingModelError(TrainCliError):
43
+ """Raised when model specification is missing."""
44
+ detail: str
45
+ hint: str | None = None
46
+
47
+
48
+ @dataclass(slots=True)
49
+ class MissingDatasetError(TrainCliError):
50
+ """Raised when dataset path is missing for SFT."""
51
+ detail: str
52
+ hint: str | None = None
53
+
54
+
55
+ @dataclass(slots=True)
56
+ class MissingComputeError(TrainCliError):
57
+ """Raised when compute configuration is missing or incomplete."""
58
+ detail: str
59
+ hint: str | None = None
60
+
61
+
62
+ @dataclass(slots=True)
63
+ class UnsupportedAlgorithmError(TrainCliError):
64
+ """Raised when algorithm type is not supported."""
65
+ algorithm_type: str
66
+ expected: str
67
+ hint: str | None = None
68
+
69
+
70
+ @dataclass(slots=True)
71
+ class InvalidHyperparametersError(TrainCliError):
72
+ """Raised when hyperparameters are invalid."""
73
+ detail: str
74
+ parameter: str | None = None
75
+
76
+
77
+ @dataclass(slots=True)
78
+ class InvalidTopologyError(TrainCliError):
79
+ """Raised when topology configuration is invalid."""
80
+ detail: str
81
+ hint: str | None = None
82
+
83
+
84
+ @dataclass(slots=True)
85
+ class InvalidJudgeConfigError(TrainCliError):
86
+ """Raised when judge configuration validation fails."""
87
+ detail: str
88
+
89
+ def __str__(self) -> str:
90
+ return self.detail
91
+
92
+
93
+ @dataclass(slots=True)
94
+ class InvalidRubricConfigError(TrainCliError):
95
+ """Raised when rubric configuration validation fails."""
96
+ detail: str
97
+
98
+ def __str__(self) -> str:
99
+ return self.detail
100
+
101
+
102
+ __all__ = [
103
+ "TrainCliError",
104
+ "TomlParseError",
105
+ "ConfigNotFoundError",
106
+ "InvalidSFTConfigError",
107
+ "InvalidRLConfigError",
108
+ "MissingAlgorithmError",
109
+ "MissingModelError",
110
+ "MissingDatasetError",
111
+ "MissingComputeError",
112
+ "UnsupportedAlgorithmError",
113
+ "InvalidHyperparametersError",
114
+ "InvalidTopologyError",
115
+ "InvalidJudgeConfigError",
116
+ "InvalidRubricConfigError",
117
+ ]
@@ -0,0 +1,199 @@
1
+ """
2
+ Pydantic schemas for judge/rubric configuration.
3
+
4
+ These models define the ACTUAL fields used by the backend judge service,
5
+ with all dead code removed. This is the single source of truth for what
6
+ gets sent in HTTP requests.
7
+ """
8
+
9
+ from __future__ import annotations
10
+
11
+ from typing import Any, Optional
12
+
13
+ from pydantic import Field, model_validator
14
+ from synth_ai.api.train.configs.shared import ExtraModel
15
+
16
+ __all__ = [
17
+ "RubricWeightsConfig",
18
+ "RubricConfig",
19
+ "JudgeOptionsConfig",
20
+ "JudgeConfig",
21
+ "JudgeRequestPayload",
22
+ ]
23
+
24
+
25
+ class RubricWeightsConfig(ExtraModel):
26
+ """
27
+ Reward blending weights (client-side only, not sent to backend).
28
+
29
+ These weights control how env rewards, event judge scores, and outcome
30
+ judge scores are combined into a final reward signal for policy gradients.
31
+
32
+ Formula:
33
+ total_reward = (env * env_return) + (event * sum(event_scores)) + (outcome * outcome_score)
34
+ """
35
+ env: float = Field(
36
+ default=1.0,
37
+ description="Weight for environment rewards (task app native rewards)",
38
+ ge=0.0,
39
+ )
40
+ event: float = Field(
41
+ default=0.0,
42
+ description="Weight for per-event judge scores (step-level judging)",
43
+ ge=0.0,
44
+ )
45
+ outcome: float = Field(
46
+ default=0.0,
47
+ description="Weight for outcome judge score (episode-level judging)",
48
+ ge=0.0,
49
+ )
50
+
51
+ @model_validator(mode="after")
52
+ def _validate_weights_sum(self) -> RubricWeightsConfig:
53
+ """Ensure at least one weight is non-zero."""
54
+ if self.env == 0.0 and self.event == 0.0 and self.outcome == 0.0:
55
+ raise ValueError("At least one reward weight must be non-zero")
56
+ return self
57
+
58
+
59
+ class RubricConfig(ExtraModel):
60
+ """
61
+ Top-level rubric configuration.
62
+
63
+ Controls whether rubric-based judging is enabled and how rewards are blended.
64
+ """
65
+ enabled: bool = Field(
66
+ default=False,
67
+ description="Master switch for rubric-based judging",
68
+ )
69
+ weights: RubricWeightsConfig = Field(
70
+ default_factory=RubricWeightsConfig,
71
+ description="Reward blending weights (env/event/outcome)",
72
+ )
73
+
74
+
75
+ class JudgeOptionsConfig(ExtraModel):
76
+ """
77
+ Judge provider options (sent to backend in HTTP request).
78
+
79
+ These fields are sent in the "options" object of the judge score request.
80
+ All fields here map directly to the backend JudgeOptions schema.
81
+ """
82
+ provider: str = Field(
83
+ ...,
84
+ description="Judge provider type ('openai', 'groq', 'gemini')",
85
+ pattern=r"^(openai|groq|gemini)$",
86
+ )
87
+ model: str = Field(
88
+ ...,
89
+ description="Model identifier (e.g., 'openai/gpt-oss-120b', 'gpt-5')",
90
+ min_length=1,
91
+ )
92
+ rubric_id: Optional[str] = Field(
93
+ default=None,
94
+ description="Base rubric identifier (e.g., 'crafter/bundle@v1')",
95
+ )
96
+ event: bool = Field(
97
+ default=True,
98
+ description="Enable per-event (step-level) judging",
99
+ )
100
+ outcome: bool = Field(
101
+ default=True,
102
+ description="Enable outcome (episode-level) judging",
103
+ )
104
+ timeout_s: Optional[float] = Field(
105
+ default=None,
106
+ description="Request timeout in seconds",
107
+ gt=0,
108
+ )
109
+ metadata: dict[str, Any] = Field(
110
+ default_factory=dict,
111
+ description="Optional metadata (e.g., {'async': true, 'custom_field': 'value'})",
112
+ )
113
+ rubric_overrides: dict[str, Any] = Field(
114
+ default_factory=dict,
115
+ description=(
116
+ "Static rubric criteria overrides (rarely used - TaskInfo overrides take priority). "
117
+ "Format: {'event': {'criteria': [...]}, 'outcome': {'criteria': [...]}}"
118
+ ),
119
+ )
120
+
121
+ @model_validator(mode="after")
122
+ def _validate_at_least_one_enabled(self) -> JudgeOptionsConfig:
123
+ """Ensure at least one judging type is enabled."""
124
+ if not self.event and not self.outcome:
125
+ raise ValueError("At least one of 'event' or 'outcome' must be enabled")
126
+ return self
127
+
128
+
129
+ class JudgeConfig(ExtraModel):
130
+ """
131
+ Top-level judge configuration.
132
+
133
+ This is parsed from TOML [judge] section and contains all judge-related settings.
134
+ """
135
+ options: JudgeOptionsConfig = Field(
136
+ ...,
137
+ description="Judge provider options (sent to backend)",
138
+ )
139
+
140
+
141
+ # HTTP Request Payload Structures (for documentation/type safety)
142
+
143
+ class JudgeRequestPayload(ExtraModel):
144
+ """
145
+ HTTP request payload structure for POST /api/judge/v1/score.
146
+
147
+ This is the ACTUAL payload sent to the backend judge service.
148
+ Used for type safety and documentation only.
149
+ """
150
+ policy_name: str = Field(..., description="Name of the policy being evaluated")
151
+ task_app: dict[str, Any] = Field(..., description="Task app metadata (id, base_url)")
152
+ trace: dict[str, Any] = Field(..., description="Tracing v3 payload (event_history, metadata)")
153
+ options: dict[str, Any] = Field(..., description="Judge options (provider, model, etc.)")
154
+
155
+ class Config:
156
+ extra = "allow" # Backend might add extra fields
157
+
158
+
159
+ # Helper to convert to backend request format
160
+
161
+ def build_judge_http_options(
162
+ options_config: JudgeOptionsConfig,
163
+ *,
164
+ rubric_overrides_from_task_info: Optional[dict[str, Any]] = None,
165
+ ) -> dict[str, Any]:
166
+ """
167
+ Build the 'options' dict for HTTP request to backend judge.
168
+
169
+ Args:
170
+ options_config: Validated judge options from TOML
171
+ rubric_overrides_from_task_info: Dynamic overrides fetched from TaskInfo (takes priority)
172
+
173
+ Returns:
174
+ Dict ready to send in HTTP request payload
175
+ """
176
+ payload = {
177
+ "provider": options_config.provider,
178
+ "model": options_config.model,
179
+ "event": options_config.event,
180
+ "outcome": options_config.outcome,
181
+ }
182
+
183
+ # Optional fields
184
+ if options_config.rubric_id:
185
+ payload["rubric_id"] = options_config.rubric_id
186
+
187
+ if options_config.timeout_s is not None:
188
+ payload["timeout_s"] = options_config.timeout_s
189
+
190
+ if options_config.metadata:
191
+ payload["metadata"] = options_config.metadata
192
+
193
+ # Rubric overrides: TaskInfo takes priority over static config
194
+ if rubric_overrides_from_task_info:
195
+ payload["rubric_overrides"] = rubric_overrides_from_task_info
196
+ elif options_config.rubric_overrides:
197
+ payload["rubric_overrides"] = options_config.rubric_overrides
198
+
199
+ return payload