synth-ai 0.2.12__py3-none-any.whl → 0.2.13.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- examples/agora_ex/README_MoE.md +224 -0
- examples/agora_ex/__init__.py +7 -0
- examples/agora_ex/agora_ex.py +65 -0
- examples/agora_ex/agora_ex_task_app.py +590 -0
- examples/agora_ex/configs/rl_lora_qwen3_moe_2xh200.toml +121 -0
- examples/agora_ex/reward_fn_grpo-human.py +129 -0
- examples/agora_ex/system_prompt_CURRENT.md +63 -0
- examples/agora_ex/task_app/agora_ex_task_app.py +590 -0
- examples/agora_ex/task_app/reward_fn_grpo-human.py +129 -0
- examples/agora_ex/task_app/system_prompt_CURRENT.md +63 -0
- examples/multi_step/configs/crafter_rl_outcome.toml +74 -0
- examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +175 -0
- examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +83 -0
- examples/multi_step/configs/crafter_rl_stepwise_simple.toml +78 -0
- examples/multi_step/crafter_rl_lora.md +51 -10
- examples/multi_step/sse_metrics_streaming_notes.md +357 -0
- examples/multi_step/task_app_config_notes.md +7 -1
- examples/warming_up_to_rl/configs/eval_stepwise_complex.toml +4 -2
- examples/warming_up_to_rl/configs/eval_stepwise_simple.toml +4 -2
- examples/warming_up_to_rl/run_eval.py +127 -18
- examples/warming_up_to_rl/task_app/grpo_crafter.py +3 -33
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +109 -45
- examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +42 -46
- examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +232 -193
- synth_ai/__init__.py +41 -1
- synth_ai/api/train/builders.py +49 -19
- synth_ai/api/train/configs/__init__.py +44 -0
- synth_ai/api/train/configs/rl.py +133 -0
- synth_ai/api/train/configs/sft.py +94 -0
- synth_ai/api/train/configs/shared.py +24 -0
- synth_ai/cli/demo.py +38 -39
- synth_ai/cli/rl_demo.py +81 -102
- synth_ai/cli/task_apps.py +3 -0
- synth_ai/demos/core/cli.py +121 -159
- synth_ai/environments/examples/crafter_classic/environment.py +16 -0
- synth_ai/evals/__init__.py +15 -0
- synth_ai/evals/client.py +85 -0
- synth_ai/evals/types.py +42 -0
- synth_ai/judge_schemas.py +127 -0
- synth_ai/rubrics/__init__.py +22 -0
- synth_ai/rubrics/validators.py +126 -0
- synth_ai/tracing_v3/serialization.py +130 -0
- {synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev1.dist-info}/METADATA +1 -1
- {synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev1.dist-info}/RECORD +48 -22
- {synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev1.dist-info}/entry_points.txt +0 -1
- {synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev1.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev1.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.2.12.dist-info → synth_ai-0.2.13.dev1.dist-info}/top_level.txt +0 -0
synth_ai/cli/rl_demo.py
CHANGED
|
@@ -17,18 +17,25 @@ from __future__ import annotations
|
|
|
17
17
|
|
|
18
18
|
import click
|
|
19
19
|
|
|
20
|
+
from synth_ai.demos.core import cli as demo_commands
|
|
20
21
|
|
|
21
|
-
def _forward(args: list[str]) -> None:
|
|
22
|
-
import sys
|
|
23
22
|
|
|
23
|
+
def _run_demo_command(func, *args, **kwargs) -> None:
|
|
24
|
+
"""Invoke a demo command and exit via Click on non-zero status codes."""
|
|
25
|
+
|
|
26
|
+
try:
|
|
27
|
+
result = func(*args, **kwargs)
|
|
28
|
+
except SystemExit as exc: # pragma: no cover - defensive
|
|
29
|
+
raise click.exceptions.Exit(exc.code or 1) from exc
|
|
30
|
+
|
|
31
|
+
if result is None:
|
|
32
|
+
return
|
|
24
33
|
try:
|
|
25
|
-
|
|
26
|
-
except
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
if rc != 0:
|
|
31
|
-
sys.exit(rc)
|
|
34
|
+
code = int(result)
|
|
35
|
+
except (TypeError, ValueError):
|
|
36
|
+
return
|
|
37
|
+
if code != 0:
|
|
38
|
+
raise click.exceptions.Exit(code)
|
|
32
39
|
|
|
33
40
|
|
|
34
41
|
def register(cli):
|
|
@@ -44,7 +51,7 @@ def register(cli):
|
|
|
44
51
|
|
|
45
52
|
@_rlg.command("setup")
|
|
46
53
|
def rl_setup():
|
|
47
|
-
|
|
54
|
+
_run_demo_command(demo_commands.setup)
|
|
48
55
|
|
|
49
56
|
# (prepare command removed; consolidated into configure)
|
|
50
57
|
|
|
@@ -64,34 +71,29 @@ def register(cli):
|
|
|
64
71
|
help="Path to deploy_task_app.sh (optional legacy)",
|
|
65
72
|
)
|
|
66
73
|
def rl_deploy(local: bool, app: str | None, name: str, script: str | None):
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
if script:
|
|
75
|
-
args.extend(["--script", script])
|
|
76
|
-
_forward(args)
|
|
74
|
+
_run_demo_command(
|
|
75
|
+
demo_commands.deploy,
|
|
76
|
+
local=local,
|
|
77
|
+
app=app,
|
|
78
|
+
name=name,
|
|
79
|
+
script=script,
|
|
80
|
+
)
|
|
77
81
|
|
|
78
82
|
@_rlg.command("configure")
|
|
79
83
|
def rl_configure():
|
|
80
|
-
|
|
84
|
+
_run_demo_command(demo_commands.run)
|
|
81
85
|
|
|
82
86
|
@_rlg.command("init")
|
|
83
87
|
@click.option("--template", type=str, default=None, help="Template id to instantiate")
|
|
84
88
|
@click.option("--dest", type=click.Path(), default=None, help="Destination directory for files")
|
|
85
89
|
@click.option("--force", is_flag=True, help="Overwrite existing files in destination")
|
|
86
90
|
def rl_init(template: str | None, dest: str | None, force: bool):
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
args.append("--force")
|
|
94
|
-
_forward(args)
|
|
91
|
+
_run_demo_command(
|
|
92
|
+
demo_commands.init,
|
|
93
|
+
template=template,
|
|
94
|
+
dest=dest,
|
|
95
|
+
force=force,
|
|
96
|
+
)
|
|
95
97
|
|
|
96
98
|
@_rlg.command("run")
|
|
97
99
|
@click.option(
|
|
@@ -110,29 +112,24 @@ def register(cli):
|
|
|
110
112
|
timeout: int,
|
|
111
113
|
dry_run: bool,
|
|
112
114
|
):
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
if timeout is not None:
|
|
123
|
-
args.extend(["--timeout", str(timeout)])
|
|
124
|
-
if dry_run:
|
|
125
|
-
args.append("--dry-run")
|
|
126
|
-
_forward(args)
|
|
115
|
+
_run_demo_command(
|
|
116
|
+
demo_commands.run,
|
|
117
|
+
config=config,
|
|
118
|
+
batch_size=batch_size,
|
|
119
|
+
group_size=group_size,
|
|
120
|
+
model=model,
|
|
121
|
+
timeout=timeout,
|
|
122
|
+
dry_run=dry_run,
|
|
123
|
+
)
|
|
127
124
|
|
|
128
125
|
# Dotted aliases (top-level): legacy check → setup
|
|
129
126
|
@cli.command("rl_demo.check")
|
|
130
127
|
def rl_check_alias():
|
|
131
|
-
|
|
128
|
+
_run_demo_command(demo_commands.setup)
|
|
132
129
|
|
|
133
130
|
@cli.command("rl_demo.setup")
|
|
134
131
|
def rl_setup_alias():
|
|
135
|
-
|
|
132
|
+
_run_demo_command(demo_commands.setup)
|
|
136
133
|
|
|
137
134
|
# (prepare alias removed)
|
|
138
135
|
|
|
@@ -152,34 +149,29 @@ def register(cli):
|
|
|
152
149
|
help="Path to deploy_task_app.sh (optional legacy)",
|
|
153
150
|
)
|
|
154
151
|
def rl_deploy_alias(local: bool, app: str | None, name: str, script: str | None):
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
if script:
|
|
163
|
-
args.extend(["--script", script])
|
|
164
|
-
_forward(args)
|
|
152
|
+
_run_demo_command(
|
|
153
|
+
demo_commands.deploy,
|
|
154
|
+
local=local,
|
|
155
|
+
app=app,
|
|
156
|
+
name=name,
|
|
157
|
+
script=script,
|
|
158
|
+
)
|
|
165
159
|
|
|
166
160
|
@cli.command("rl_demo.configure")
|
|
167
161
|
def rl_configure_alias():
|
|
168
|
-
|
|
162
|
+
_run_demo_command(demo_commands.run)
|
|
169
163
|
|
|
170
164
|
@cli.command("rl_demo.init")
|
|
171
165
|
@click.option("--template", type=str, default=None, help="Template id to instantiate")
|
|
172
166
|
@click.option("--dest", type=click.Path(), default=None, help="Destination directory for files")
|
|
173
167
|
@click.option("--force", is_flag=True, help="Overwrite existing files in destination")
|
|
174
168
|
def rl_init_alias(template: str | None, dest: str | None, force: bool):
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
args.append("--force")
|
|
182
|
-
_forward(args)
|
|
169
|
+
_run_demo_command(
|
|
170
|
+
demo_commands.init,
|
|
171
|
+
template=template,
|
|
172
|
+
dest=dest,
|
|
173
|
+
force=force,
|
|
174
|
+
)
|
|
183
175
|
|
|
184
176
|
@cli.command("rl_demo.run")
|
|
185
177
|
@click.option(
|
|
@@ -198,20 +190,15 @@ def register(cli):
|
|
|
198
190
|
timeout: int,
|
|
199
191
|
dry_run: bool,
|
|
200
192
|
):
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
if timeout is not None:
|
|
211
|
-
args.extend(["--timeout", str(timeout)])
|
|
212
|
-
if dry_run:
|
|
213
|
-
args.append("--dry-run")
|
|
214
|
-
_forward(args)
|
|
193
|
+
_run_demo_command(
|
|
194
|
+
demo_commands.run,
|
|
195
|
+
config=config,
|
|
196
|
+
batch_size=batch_size,
|
|
197
|
+
group_size=group_size,
|
|
198
|
+
model=model,
|
|
199
|
+
timeout=timeout,
|
|
200
|
+
dry_run=dry_run,
|
|
201
|
+
)
|
|
215
202
|
|
|
216
203
|
# Top-level convenience alias: `synth-ai deploy`
|
|
217
204
|
@cli.command("demo-deploy")
|
|
@@ -230,16 +217,13 @@ def register(cli):
|
|
|
230
217
|
help="Path to deploy_task_app.sh (optional legacy)",
|
|
231
218
|
)
|
|
232
219
|
def deploy_demo(local: bool, app: str | None, name: str, script: str | None):
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
if script:
|
|
241
|
-
args.extend(["--script", script])
|
|
242
|
-
_forward(args)
|
|
220
|
+
_run_demo_command(
|
|
221
|
+
demo_commands.deploy,
|
|
222
|
+
local=local,
|
|
223
|
+
app=app,
|
|
224
|
+
name=name,
|
|
225
|
+
script=script,
|
|
226
|
+
)
|
|
243
227
|
|
|
244
228
|
@cli.command("run")
|
|
245
229
|
@click.option(
|
|
@@ -258,17 +242,12 @@ def register(cli):
|
|
|
258
242
|
timeout: int,
|
|
259
243
|
dry_run: bool,
|
|
260
244
|
):
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
if timeout is not None:
|
|
271
|
-
args.extend(["--timeout", str(timeout)])
|
|
272
|
-
if dry_run:
|
|
273
|
-
args.append("--dry-run")
|
|
274
|
-
_forward(args)
|
|
245
|
+
_run_demo_command(
|
|
246
|
+
demo_commands.run,
|
|
247
|
+
config=config,
|
|
248
|
+
batch_size=batch_size,
|
|
249
|
+
group_size=group_size,
|
|
250
|
+
model=model,
|
|
251
|
+
timeout=timeout,
|
|
252
|
+
dry_run=dry_run,
|
|
253
|
+
)
|
synth_ai/cli/task_apps.py
CHANGED
|
@@ -778,6 +778,9 @@ def _select_app_choice(app_id: str | None, purpose: str) -> AppChoice:
|
|
|
778
778
|
if not matches:
|
|
779
779
|
available = ", ".join(sorted({c.app_id for c in filtered}))
|
|
780
780
|
raise click.ClickException(f"Task app '{app_id}' not found. Available: {available}")
|
|
781
|
+
exact_matches = [c for c in matches if c.app_id == app_id]
|
|
782
|
+
if len(exact_matches) == 1:
|
|
783
|
+
return exact_matches[0]
|
|
781
784
|
if len(matches) == 1:
|
|
782
785
|
return matches[0]
|
|
783
786
|
# Prefer entries with modal support when required
|