synth-ai 0.2.10__py3-none-any.whl → 0.2.13.dev1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- examples/agora_ex/README_MoE.md +224 -0
- examples/agora_ex/__init__.py +7 -0
- examples/agora_ex/agora_ex.py +65 -0
- examples/agora_ex/agora_ex_task_app.py +590 -0
- examples/agora_ex/configs/rl_lora_qwen3_moe_2xh200.toml +121 -0
- examples/agora_ex/reward_fn_grpo-human.py +129 -0
- examples/agora_ex/system_prompt_CURRENT.md +63 -0
- examples/agora_ex/task_app/agora_ex_task_app.py +590 -0
- examples/agora_ex/task_app/reward_fn_grpo-human.py +129 -0
- examples/agora_ex/task_app/system_prompt_CURRENT.md +63 -0
- examples/multi_step/configs/crafter_rl_outcome.toml +74 -0
- examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +175 -0
- examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +83 -0
- examples/multi_step/configs/crafter_rl_stepwise_simple.toml +78 -0
- examples/multi_step/crafter_rl_lora.md +51 -10
- examples/multi_step/sse_metrics_streaming_notes.md +357 -0
- examples/multi_step/task_app_config_notes.md +494 -0
- examples/warming_up_to_rl/configs/eval_stepwise_complex.toml +35 -0
- examples/warming_up_to_rl/configs/eval_stepwise_consistent.toml +26 -0
- examples/warming_up_to_rl/configs/eval_stepwise_per_achievement.toml +36 -0
- examples/warming_up_to_rl/configs/eval_stepwise_simple.toml +32 -0
- examples/warming_up_to_rl/run_eval.py +267 -41
- examples/warming_up_to_rl/task_app/grpo_crafter.py +3 -33
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +109 -45
- examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +42 -46
- examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +376 -193
- synth_ai/__init__.py +41 -1
- synth_ai/api/train/builders.py +74 -33
- synth_ai/api/train/cli.py +29 -6
- synth_ai/api/train/configs/__init__.py +44 -0
- synth_ai/api/train/configs/rl.py +133 -0
- synth_ai/api/train/configs/sft.py +94 -0
- synth_ai/api/train/configs/shared.py +24 -0
- synth_ai/api/train/env_resolver.py +18 -19
- synth_ai/api/train/supported_algos.py +8 -5
- synth_ai/api/train/utils.py +6 -1
- synth_ai/cli/__init__.py +4 -2
- synth_ai/cli/_storage.py +19 -0
- synth_ai/cli/balance.py +14 -2
- synth_ai/cli/calc.py +37 -22
- synth_ai/cli/demo.py +38 -39
- synth_ai/cli/legacy_root_backup.py +12 -14
- synth_ai/cli/recent.py +12 -7
- synth_ai/cli/rl_demo.py +81 -102
- synth_ai/cli/status.py +4 -3
- synth_ai/cli/task_apps.py +146 -137
- synth_ai/cli/traces.py +4 -3
- synth_ai/cli/watch.py +3 -2
- synth_ai/demos/core/cli.py +121 -159
- synth_ai/environments/examples/crafter_classic/environment.py +16 -0
- synth_ai/evals/__init__.py +15 -0
- synth_ai/evals/client.py +85 -0
- synth_ai/evals/types.py +42 -0
- synth_ai/jobs/client.py +15 -3
- synth_ai/judge_schemas.py +127 -0
- synth_ai/rubrics/__init__.py +22 -0
- synth_ai/rubrics/validators.py +126 -0
- synth_ai/task/server.py +14 -7
- synth_ai/tracing_v3/decorators.py +51 -26
- synth_ai/tracing_v3/examples/basic_usage.py +12 -7
- synth_ai/tracing_v3/llm_call_record_helpers.py +107 -53
- synth_ai/tracing_v3/replica_sync.py +8 -4
- synth_ai/tracing_v3/serialization.py +130 -0
- synth_ai/tracing_v3/storage/utils.py +11 -9
- synth_ai/tracing_v3/turso/__init__.py +12 -0
- synth_ai/tracing_v3/turso/daemon.py +2 -1
- synth_ai/tracing_v3/turso/native_manager.py +28 -15
- {synth_ai-0.2.10.dist-info → synth_ai-0.2.13.dev1.dist-info}/METADATA +4 -2
- {synth_ai-0.2.10.dist-info → synth_ai-0.2.13.dev1.dist-info}/RECORD +73 -40
- {synth_ai-0.2.10.dist-info → synth_ai-0.2.13.dev1.dist-info}/entry_points.txt +0 -1
- {synth_ai-0.2.10.dist-info → synth_ai-0.2.13.dev1.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.10.dist-info → synth_ai-0.2.13.dev1.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.2.10.dist-info → synth_ai-0.2.13.dev1.dist-info}/top_level.txt +0 -0
synth_ai/demos/core/cli.py
CHANGED
|
@@ -1,6 +1,5 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
import argparse
|
|
4
3
|
import contextlib
|
|
5
4
|
import json
|
|
6
5
|
import os
|
|
@@ -45,7 +44,7 @@ def _is_modal_public_url(u: str) -> bool:
|
|
|
45
44
|
return False
|
|
46
45
|
|
|
47
46
|
|
|
48
|
-
def
|
|
47
|
+
def setup() -> int:
|
|
49
48
|
# Change to demo directory if stored
|
|
50
49
|
demo_dir = demo_core.load_demo_dir()
|
|
51
50
|
if demo_dir and os.path.isdir(demo_dir):
|
|
@@ -760,7 +759,9 @@ def _ensure_task_app_ready(env: DemoEnv, synth_key: str, *, label: str) -> DemoE
|
|
|
760
759
|
return updated_env
|
|
761
760
|
|
|
762
761
|
|
|
763
|
-
def
|
|
762
|
+
def deploy(
|
|
763
|
+
local: bool = False, app: str | None = None, name: str | None = None, script: str | None = None
|
|
764
|
+
) -> int:
|
|
764
765
|
# Change to demo directory if stored
|
|
765
766
|
demo_dir = demo_core.load_demo_dir()
|
|
766
767
|
if demo_dir and os.path.isdir(demo_dir):
|
|
@@ -774,7 +775,7 @@ def cmd_deploy(args: argparse.Namespace) -> int:
|
|
|
774
775
|
url = ""
|
|
775
776
|
app_name = env.task_app_name or ""
|
|
776
777
|
try:
|
|
777
|
-
if
|
|
778
|
+
if local:
|
|
778
779
|
print("Starting local Task App…")
|
|
779
780
|
import subprocess
|
|
780
781
|
|
|
@@ -798,7 +799,7 @@ def cmd_deploy(args: argparse.Namespace) -> int:
|
|
|
798
799
|
time.sleep(1)
|
|
799
800
|
else:
|
|
800
801
|
# Auto-detect app path if not supplied; prompt interactively from discovered ASGI apps
|
|
801
|
-
app_path = os.path.abspath(
|
|
802
|
+
app_path = os.path.abspath(app) if app else None
|
|
802
803
|
if not app_path or not os.path.isfile(app_path):
|
|
803
804
|
# First pass: look for known common filenames
|
|
804
805
|
candidates = [
|
|
@@ -828,13 +829,13 @@ def cmd_deploy(args: argparse.Namespace) -> int:
|
|
|
828
829
|
choice = 1
|
|
829
830
|
choice = max(1, min(choice, len(found)))
|
|
830
831
|
app_path = str(found[choice - 1].resolve())
|
|
831
|
-
if not app_path and
|
|
832
|
+
if not app_path and script:
|
|
832
833
|
# Legacy script fallback if user supplied --script explicitly
|
|
833
834
|
from synth_ai.demos.demo_task_apps.math.deploy_modal import deploy as modal_deploy
|
|
834
835
|
|
|
835
|
-
url = modal_deploy(script_path=
|
|
836
|
-
if
|
|
837
|
-
app_name =
|
|
836
|
+
url = modal_deploy(script_path=script, env_api_key=env.env_api_key)
|
|
837
|
+
if name:
|
|
838
|
+
app_name = name
|
|
838
839
|
else:
|
|
839
840
|
if not app_path:
|
|
840
841
|
entered = input("Path to Modal app.py (e.g., ./task_app.py): ").strip()
|
|
@@ -845,7 +846,7 @@ def cmd_deploy(args: argparse.Namespace) -> int:
|
|
|
845
846
|
raise FileNotFoundError(f"App file not found: {app_path}")
|
|
846
847
|
# Surface the app path before asking for the name
|
|
847
848
|
print(f"Using task app: {app_path}")
|
|
848
|
-
existing_name = (
|
|
849
|
+
existing_name = (name or env.task_app_name or "").strip()
|
|
849
850
|
if not existing_name:
|
|
850
851
|
existing_name = f"synth-{os.path.splitext(os.path.basename(app_path))[0]}"
|
|
851
852
|
suggested_name = existing_name
|
|
@@ -1128,7 +1129,7 @@ def _ensure_modal_installed() -> None:
|
|
|
1128
1129
|
print("\n You can deploy later after authenticating.\n")
|
|
1129
1130
|
|
|
1130
1131
|
|
|
1131
|
-
def
|
|
1132
|
+
def init(template: str | None = None, dest: str | None = None, force: bool = False) -> int:
|
|
1132
1133
|
"""Materialise a demo task app template into the current directory."""
|
|
1133
1134
|
|
|
1134
1135
|
templates = list(list_demo_templates())
|
|
@@ -1137,37 +1138,44 @@ def cmd_init(args: argparse.Namespace) -> int:
|
|
|
1137
1138
|
return 1
|
|
1138
1139
|
|
|
1139
1140
|
selected: DemoTemplate | None = None
|
|
1140
|
-
if
|
|
1141
|
-
selected = get_demo_template(
|
|
1141
|
+
if template:
|
|
1142
|
+
selected = get_demo_template(template)
|
|
1142
1143
|
if selected is None:
|
|
1143
1144
|
available = ", ".join(t.template_id for t in templates)
|
|
1144
|
-
print(f"Unknown template '{
|
|
1145
|
+
print(f"Unknown template '{template}'. Available: {available}")
|
|
1145
1146
|
return 1
|
|
1146
1147
|
else:
|
|
1147
|
-
|
|
1148
|
-
|
|
1149
|
-
print(
|
|
1150
|
-
|
|
1151
|
-
|
|
1152
|
-
|
|
1153
|
-
|
|
1154
|
-
|
|
1155
|
-
|
|
1156
|
-
|
|
1157
|
-
|
|
1158
|
-
|
|
1159
|
-
|
|
1160
|
-
|
|
1161
|
-
|
|
1162
|
-
|
|
1148
|
+
if force:
|
|
1149
|
+
selected = templates[0]
|
|
1150
|
+
print(
|
|
1151
|
+
f"Using default template: {selected.name} ({selected.template_id}) "
|
|
1152
|
+
f"(pass --template to choose another)"
|
|
1153
|
+
)
|
|
1154
|
+
else:
|
|
1155
|
+
print("Select a demo template:" + "\n")
|
|
1156
|
+
for idx, tpl in enumerate(templates, start=1):
|
|
1157
|
+
print(f" [{idx}] {tpl.name} ({tpl.template_id})")
|
|
1158
|
+
print(f" {tpl.description}")
|
|
1159
|
+
try:
|
|
1160
|
+
choice_raw = input(f"Enter choice [1-{len(templates)}] (default 1): ").strip() or "1"
|
|
1161
|
+
except Exception:
|
|
1162
|
+
choice_raw = "1"
|
|
1163
|
+
if not choice_raw.isdigit():
|
|
1164
|
+
print("Selection must be a number.")
|
|
1165
|
+
return 1
|
|
1166
|
+
choice_idx = int(choice_raw)
|
|
1167
|
+
if not 1 <= choice_idx <= len(templates):
|
|
1168
|
+
print("Selection out of range.")
|
|
1169
|
+
return 1
|
|
1170
|
+
selected = templates[choice_idx - 1]
|
|
1163
1171
|
|
|
1164
1172
|
assert selected is not None
|
|
1165
1173
|
|
|
1166
1174
|
default_subdir = selected.default_subdir or selected.template_id
|
|
1167
1175
|
|
|
1168
1176
|
# Check if default destination is already occupied and switch to local_demos/ if needed
|
|
1169
|
-
if
|
|
1170
|
-
default_dest = Path(
|
|
1177
|
+
if dest:
|
|
1178
|
+
default_dest = Path(dest).expanduser().resolve()
|
|
1171
1179
|
else:
|
|
1172
1180
|
primary_dest = Path.cwd() / default_subdir
|
|
1173
1181
|
if primary_dest.exists() and any(primary_dest.iterdir()):
|
|
@@ -1176,10 +1184,13 @@ def cmd_init(args: argparse.Namespace) -> int:
|
|
|
1176
1184
|
else:
|
|
1177
1185
|
default_dest = primary_dest.resolve()
|
|
1178
1186
|
|
|
1179
|
-
|
|
1180
|
-
dest_input = input(f"Destination directory [{default_dest}]: ").strip()
|
|
1181
|
-
except Exception:
|
|
1187
|
+
if force:
|
|
1182
1188
|
dest_input = ""
|
|
1189
|
+
else:
|
|
1190
|
+
try:
|
|
1191
|
+
dest_input = input(f"Destination directory [{default_dest}]: ").strip()
|
|
1192
|
+
except Exception:
|
|
1193
|
+
dest_input = ""
|
|
1183
1194
|
destination = Path(dest_input).expanduser().resolve() if dest_input else default_dest
|
|
1184
1195
|
|
|
1185
1196
|
# Track whether we should skip individual file prompts (if we already cleared the directory)
|
|
@@ -1190,15 +1201,18 @@ def cmd_init(args: argparse.Namespace) -> int:
|
|
|
1190
1201
|
print(f"Destination {destination} is a file. Provide a directory path.")
|
|
1191
1202
|
return 1
|
|
1192
1203
|
if any(destination.iterdir()):
|
|
1193
|
-
|
|
1194
|
-
response =
|
|
1195
|
-
|
|
1196
|
-
|
|
1197
|
-
|
|
1198
|
-
|
|
1199
|
-
|
|
1200
|
-
|
|
1201
|
-
|
|
1204
|
+
if force:
|
|
1205
|
+
response = "y"
|
|
1206
|
+
else:
|
|
1207
|
+
try:
|
|
1208
|
+
response = (
|
|
1209
|
+
input(f"Destination {destination} is not empty. Overwrite? [y/N]: ")
|
|
1210
|
+
.strip()
|
|
1211
|
+
.lower()
|
|
1212
|
+
)
|
|
1213
|
+
except (EOFError, KeyboardInterrupt):
|
|
1214
|
+
print("\nCancelled.")
|
|
1215
|
+
return 1
|
|
1202
1216
|
if response not in ("y", "yes"):
|
|
1203
1217
|
print("Cancelled. Choose another directory or delete the existing one.")
|
|
1204
1218
|
return 1
|
|
@@ -1236,15 +1250,18 @@ def cmd_init(args: argparse.Namespace) -> int:
|
|
|
1236
1250
|
# Handle directory copying
|
|
1237
1251
|
if src_path.is_dir():
|
|
1238
1252
|
if dest_path.exists() and not directory_cleared:
|
|
1239
|
-
|
|
1240
|
-
response =
|
|
1241
|
-
|
|
1242
|
-
|
|
1243
|
-
|
|
1244
|
-
|
|
1245
|
-
|
|
1246
|
-
|
|
1247
|
-
|
|
1253
|
+
if force:
|
|
1254
|
+
response = "y"
|
|
1255
|
+
else:
|
|
1256
|
+
try:
|
|
1257
|
+
response = (
|
|
1258
|
+
input(f"Directory {dest_path.name} exists. Overwrite? [y/N]: ")
|
|
1259
|
+
.strip()
|
|
1260
|
+
.lower()
|
|
1261
|
+
)
|
|
1262
|
+
except (EOFError, KeyboardInterrupt):
|
|
1263
|
+
print("\nCancelled.")
|
|
1264
|
+
return 1
|
|
1248
1265
|
if response not in ("y", "yes"):
|
|
1249
1266
|
print(f"Skipping {dest_path.name}")
|
|
1250
1267
|
continue
|
|
@@ -1256,15 +1273,18 @@ def cmd_init(args: argparse.Namespace) -> int:
|
|
|
1256
1273
|
# Handle file copying
|
|
1257
1274
|
dest_path.parent.mkdir(parents=True, exist_ok=True)
|
|
1258
1275
|
if dest_path.exists() and not directory_cleared:
|
|
1259
|
-
|
|
1260
|
-
response =
|
|
1261
|
-
|
|
1262
|
-
|
|
1263
|
-
|
|
1264
|
-
|
|
1265
|
-
|
|
1266
|
-
|
|
1267
|
-
|
|
1276
|
+
if force:
|
|
1277
|
+
response = "y"
|
|
1278
|
+
else:
|
|
1279
|
+
try:
|
|
1280
|
+
response = (
|
|
1281
|
+
input(f"File {dest_path.name} exists. Overwrite? [y/N]: ")
|
|
1282
|
+
.strip()
|
|
1283
|
+
.lower()
|
|
1284
|
+
)
|
|
1285
|
+
except (EOFError, KeyboardInterrupt):
|
|
1286
|
+
print("\nCancelled.")
|
|
1287
|
+
return 1
|
|
1268
1288
|
if response not in ("y", "yes"):
|
|
1269
1289
|
print(f"Skipping {dest_path.name}")
|
|
1270
1290
|
continue
|
|
@@ -1280,11 +1300,14 @@ def cmd_init(args: argparse.Namespace) -> int:
|
|
|
1280
1300
|
env_path = destination / ".env"
|
|
1281
1301
|
should_write = True
|
|
1282
1302
|
if env_path.exists() and not directory_cleared:
|
|
1283
|
-
|
|
1284
|
-
response =
|
|
1285
|
-
|
|
1286
|
-
|
|
1287
|
-
|
|
1303
|
+
if force:
|
|
1304
|
+
response = "y"
|
|
1305
|
+
else:
|
|
1306
|
+
try:
|
|
1307
|
+
response = input("File .env exists. Overwrite? [y/N]: ").strip().lower()
|
|
1308
|
+
except (EOFError, KeyboardInterrupt):
|
|
1309
|
+
print("\nCancelled.")
|
|
1310
|
+
return 1
|
|
1288
1311
|
should_write = response in ("y", "yes")
|
|
1289
1312
|
if should_write:
|
|
1290
1313
|
_write_text(env_path, "\n".join(selected.env_lines) + "\n")
|
|
@@ -1296,13 +1319,16 @@ def cmd_init(args: argparse.Namespace) -> int:
|
|
|
1296
1319
|
cfg_dst = (destination / selected.config_destination).resolve()
|
|
1297
1320
|
should_copy = True
|
|
1298
1321
|
if cfg_dst.exists() and not directory_cleared:
|
|
1299
|
-
|
|
1300
|
-
response =
|
|
1301
|
-
|
|
1302
|
-
|
|
1303
|
-
|
|
1304
|
-
|
|
1305
|
-
|
|
1322
|
+
if force:
|
|
1323
|
+
response = "y"
|
|
1324
|
+
else:
|
|
1325
|
+
try:
|
|
1326
|
+
response = (
|
|
1327
|
+
input(f"File {cfg_dst.name} exists. Overwrite? [y/N]: ").strip().lower()
|
|
1328
|
+
)
|
|
1329
|
+
except (EOFError, KeyboardInterrupt):
|
|
1330
|
+
print("\nCancelled.")
|
|
1331
|
+
return 1
|
|
1306
1332
|
should_copy = response in ("y", "yes")
|
|
1307
1333
|
if should_copy:
|
|
1308
1334
|
cfg_dst.parent.mkdir(parents=True, exist_ok=True)
|
|
@@ -1388,7 +1414,14 @@ def _write_text(path: str, content: str) -> None:
|
|
|
1388
1414
|
# Note: `prepare` command has been removed; configuration now prepares TOML
|
|
1389
1415
|
|
|
1390
1416
|
|
|
1391
|
-
def
|
|
1417
|
+
def run(
|
|
1418
|
+
config: str | None = None,
|
|
1419
|
+
batch_size: int | None = None,
|
|
1420
|
+
group_size: int | None = None,
|
|
1421
|
+
model: str | None = None,
|
|
1422
|
+
timeout: int = 600,
|
|
1423
|
+
dry_run: bool = False,
|
|
1424
|
+
) -> int:
|
|
1392
1425
|
# Change to demo directory if stored
|
|
1393
1426
|
demo_dir = demo_core.load_demo_dir()
|
|
1394
1427
|
if demo_dir and os.path.isdir(demo_dir):
|
|
@@ -1429,7 +1462,7 @@ def cmd_run(args: argparse.Namespace) -> int:
|
|
|
1429
1462
|
import tomllib
|
|
1430
1463
|
|
|
1431
1464
|
try:
|
|
1432
|
-
cfg_path = _select_or_create_config(
|
|
1465
|
+
cfg_path = _select_or_create_config(config, env)
|
|
1433
1466
|
except FileNotFoundError as exc:
|
|
1434
1467
|
print(exc)
|
|
1435
1468
|
return 1
|
|
@@ -1451,12 +1484,12 @@ def cmd_run(args: argparse.Namespace) -> int:
|
|
|
1451
1484
|
# Optional: TRAINER_START_URL passthrough if already set in environment
|
|
1452
1485
|
run_env["TRAINER_START_URL"] = run_env.get("TRAINER_START_URL", "")
|
|
1453
1486
|
# Forward convenience knobs
|
|
1454
|
-
if
|
|
1455
|
-
run_env["RL_BATCH_SIZE"] = str(int(
|
|
1456
|
-
if
|
|
1457
|
-
run_env["RL_GROUP_SIZE"] = str(int(
|
|
1458
|
-
if
|
|
1459
|
-
run_env["RL_MODEL"] =
|
|
1487
|
+
if batch_size is not None:
|
|
1488
|
+
run_env["RL_BATCH_SIZE"] = str(int(batch_size))
|
|
1489
|
+
if group_size is not None:
|
|
1490
|
+
run_env["RL_GROUP_SIZE"] = str(int(group_size))
|
|
1491
|
+
if model:
|
|
1492
|
+
run_env["RL_MODEL"] = model
|
|
1460
1493
|
cmd = ["uv", "run", "python", launcher]
|
|
1461
1494
|
print(f"Launching monorepo clustered runner: {' '.join(cmd)}")
|
|
1462
1495
|
code = _popen_stream(cmd, env=run_env)
|
|
@@ -1484,11 +1517,11 @@ def cmd_run(args: argparse.Namespace) -> int:
|
|
|
1484
1517
|
inline_cfg = tomllib.load(fh)
|
|
1485
1518
|
with open(cfg_path) as fh2:
|
|
1486
1519
|
toml_text = fh2.read()
|
|
1487
|
-
if
|
|
1488
|
-
inline_cfg.setdefault("training", {})["batch_size"] = int(
|
|
1489
|
-
if
|
|
1490
|
-
inline_cfg.setdefault("training", {})["group_size"] = int(
|
|
1491
|
-
model_name =
|
|
1520
|
+
if batch_size is not None:
|
|
1521
|
+
inline_cfg.setdefault("training", {})["batch_size"] = int(batch_size)
|
|
1522
|
+
if group_size is not None:
|
|
1523
|
+
inline_cfg.setdefault("training", {})["group_size"] = int(group_size)
|
|
1524
|
+
model_name = model or (inline_cfg.get("model", {}) or {}).get("name", "Qwen/Qwen3-0.6B")
|
|
1492
1525
|
api = env.dev_backend_url.rstrip("/") + ("" if env.dev_backend_url.endswith("/api") else "/api")
|
|
1493
1526
|
# Print backend and key preview before request for clearer diagnostics
|
|
1494
1527
|
try:
|
|
@@ -1678,79 +1711,8 @@ def cmd_run(args: argparse.Namespace) -> int:
|
|
|
1678
1711
|
if name == "eval.reward_mean":
|
|
1679
1712
|
print(f"metric eval.reward_mean step={p.get('step')} value={p.get('value')}")
|
|
1680
1713
|
break
|
|
1681
|
-
if time.time() - start_t > (
|
|
1714
|
+
if time.time() - start_t > (timeout or 600):
|
|
1682
1715
|
print("Timeout waiting for terminal state.")
|
|
1683
1716
|
break
|
|
1684
1717
|
time.sleep(2)
|
|
1685
1718
|
return 0
|
|
1686
|
-
|
|
1687
|
-
|
|
1688
|
-
def main(argv: list[str] | None = None) -> int:
|
|
1689
|
-
p = argparse.ArgumentParser(prog="synth-ai")
|
|
1690
|
-
sub = p.add_subparsers(dest="cmd")
|
|
1691
|
-
|
|
1692
|
-
def _add_parser(
|
|
1693
|
-
names: list[str], *, configure: Callable[[argparse.ArgumentParser], None]
|
|
1694
|
-
) -> None:
|
|
1695
|
-
for name in names:
|
|
1696
|
-
parser = sub.add_parser(name)
|
|
1697
|
-
configure(parser)
|
|
1698
|
-
|
|
1699
|
-
_add_parser(
|
|
1700
|
-
["rl_demo.setup", "demo.setup"],
|
|
1701
|
-
configure=lambda parser: parser.set_defaults(func=cmd_setup),
|
|
1702
|
-
)
|
|
1703
|
-
|
|
1704
|
-
def _init_opts(parser):
|
|
1705
|
-
parser.add_argument("--template", type=str, default=None, help="Template id to instantiate")
|
|
1706
|
-
parser.add_argument(
|
|
1707
|
-
"--dest", type=str, default=None, help="Destination directory for files"
|
|
1708
|
-
)
|
|
1709
|
-
parser.set_defaults(func=cmd_init)
|
|
1710
|
-
|
|
1711
|
-
_add_parser(["rl_demo.init", "demo.init"], configure=_init_opts)
|
|
1712
|
-
|
|
1713
|
-
# (prepare command removed)
|
|
1714
|
-
|
|
1715
|
-
def _deploy_opts(parser):
|
|
1716
|
-
parser.add_argument(
|
|
1717
|
-
"--local", action="store_true", help="Run local FastAPI instead of Modal deploy"
|
|
1718
|
-
)
|
|
1719
|
-
parser.add_argument(
|
|
1720
|
-
"--app", type=str, default=None, help="Path to Modal app.py for uv run modal deploy"
|
|
1721
|
-
)
|
|
1722
|
-
parser.add_argument("--name", type=str, default=None, help="Modal app name")
|
|
1723
|
-
parser.add_argument(
|
|
1724
|
-
"--script", type=str, default=None, help="Path to deploy_task_app.sh (optional legacy)"
|
|
1725
|
-
)
|
|
1726
|
-
parser.set_defaults(func=cmd_deploy)
|
|
1727
|
-
|
|
1728
|
-
_add_parser(["rl_demo.deploy", "demo.deploy"], configure=_deploy_opts)
|
|
1729
|
-
|
|
1730
|
-
_add_parser(
|
|
1731
|
-
["rl_demo.configure", "demo.configure"],
|
|
1732
|
-
configure=lambda parser: parser.set_defaults(func=cmd_run),
|
|
1733
|
-
)
|
|
1734
|
-
|
|
1735
|
-
def _run_opts(parser):
|
|
1736
|
-
parser.add_argument(
|
|
1737
|
-
"--config", type=str, default=None, help="Path to TOML config (skip prompt)"
|
|
1738
|
-
)
|
|
1739
|
-
parser.add_argument("--batch-size", type=int, default=None)
|
|
1740
|
-
parser.add_argument("--group-size", type=int, default=None)
|
|
1741
|
-
parser.add_argument("--model", type=str, default=None)
|
|
1742
|
-
parser.add_argument("--timeout", type=int, default=600)
|
|
1743
|
-
parser.add_argument("--dry-run", action="store_true", help="Print request body and exit")
|
|
1744
|
-
parser.set_defaults(func=cmd_run)
|
|
1745
|
-
|
|
1746
|
-
_add_parser(["run", "rl_demo.run", "demo.run"], configure=_run_opts)
|
|
1747
|
-
|
|
1748
|
-
args = p.parse_args(argv)
|
|
1749
|
-
if not hasattr(args, "func"):
|
|
1750
|
-
p.print_help()
|
|
1751
|
-
return 1
|
|
1752
|
-
return int(args.func(args) or 0)
|
|
1753
|
-
|
|
1754
|
-
|
|
1755
|
-
if __name__ == "__main__":
|
|
1756
|
-
sys.exit(main())
|
|
@@ -190,6 +190,22 @@ class SynthCrafterObservationCallable(GetObservationCallable):
|
|
|
190
190
|
obs_dict["truncated"] = priv.truncated
|
|
191
191
|
if pub.error_info:
|
|
192
192
|
obs_dict["tool_error"] = pub.error_info
|
|
193
|
+
counts_payload = {}
|
|
194
|
+
try:
|
|
195
|
+
counts = getattr(priv, "achievements_current_values", {}) or {}
|
|
196
|
+
for k, v in counts.items():
|
|
197
|
+
try:
|
|
198
|
+
counts_payload[str(k)] = int(v)
|
|
199
|
+
except Exception:
|
|
200
|
+
try:
|
|
201
|
+
counts_payload[str(k)] = int(float(v))
|
|
202
|
+
except Exception:
|
|
203
|
+
continue
|
|
204
|
+
if counts_payload:
|
|
205
|
+
obs_dict["achievements_counts"] = counts_payload
|
|
206
|
+
except Exception:
|
|
207
|
+
# Best effort; omit counts if coercion fails
|
|
208
|
+
pass
|
|
193
209
|
|
|
194
210
|
# Derive a simple local semantic patch around the player for easy rendering
|
|
195
211
|
try:
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
from .client import JudgeClient, JudgeOptions, JudgeScoreResponse
|
|
2
|
+
from .types import Judgement, RewardJudgement, RewardMetadata, Track, TrackAggregate
|
|
3
|
+
|
|
4
|
+
__all__ = [
|
|
5
|
+
"JudgeClient",
|
|
6
|
+
"JudgeOptions",
|
|
7
|
+
"JudgeScoreResponse",
|
|
8
|
+
"Judgement",
|
|
9
|
+
"RewardJudgement",
|
|
10
|
+
"RewardMetadata",
|
|
11
|
+
"Track",
|
|
12
|
+
"TrackAggregate",
|
|
13
|
+
]
|
|
14
|
+
|
|
15
|
+
|
synth_ai/evals/client.py
ADDED
|
@@ -0,0 +1,85 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
"""Experimental Judge API client.
|
|
4
|
+
|
|
5
|
+
This surface is experimental and subject to change without notice.
|
|
6
|
+
Set environment variable `SYNTH_SILENCE_EXPERIMENTAL=1` to silence warnings.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
import os
|
|
10
|
+
import warnings
|
|
11
|
+
from typing import Any, Literal, TypedDict
|
|
12
|
+
|
|
13
|
+
from synth_ai.http import AsyncHttpClient, HTTPError
|
|
14
|
+
from synth_ai.tracing_v3.serialization import normalize_for_json
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
Provider = Literal["groq", "gemini"]
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class JudgeOptions(TypedDict, total=False):
|
|
21
|
+
event: bool
|
|
22
|
+
outcome: bool
|
|
23
|
+
rubric_id: str
|
|
24
|
+
rubric_overrides: dict[str, Any]
|
|
25
|
+
provider: Provider
|
|
26
|
+
model: str
|
|
27
|
+
max_concurrency: int
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
class JudgeScoreResponse(TypedDict, total=False):
|
|
31
|
+
status: str
|
|
32
|
+
event_rewards: list[dict[str, Any]]
|
|
33
|
+
outcome_reward: dict[str, Any]
|
|
34
|
+
details: dict[str, Any]
|
|
35
|
+
|
|
36
|
+
|
|
37
|
+
class JudgeClient:
|
|
38
|
+
def __init__(self, base_url: str, api_key: str, *, timeout: float = 60.0) -> None:
|
|
39
|
+
_silence = (os.getenv("SYNTH_SILENCE_EXPERIMENTAL") or "").strip().lower()
|
|
40
|
+
if _silence not in {"1", "true", "t", "yes", "y", "on"}:
|
|
41
|
+
warnings.warn(
|
|
42
|
+
"Experimental API: synth_ai.evals.JudgeClient is experimental and may change without notice.",
|
|
43
|
+
UserWarning,
|
|
44
|
+
stacklevel=2,
|
|
45
|
+
)
|
|
46
|
+
self._base = base_url.rstrip("/")
|
|
47
|
+
self._key = api_key
|
|
48
|
+
self._timeout = timeout
|
|
49
|
+
|
|
50
|
+
async def score(
|
|
51
|
+
self,
|
|
52
|
+
*,
|
|
53
|
+
trace: dict[str, Any] | Any,
|
|
54
|
+
policy_name: str,
|
|
55
|
+
task_app_id: str,
|
|
56
|
+
options: JudgeOptions,
|
|
57
|
+
task_app_base_url: str | None = None,
|
|
58
|
+
) -> JudgeScoreResponse:
|
|
59
|
+
body = {
|
|
60
|
+
"policy_name": policy_name,
|
|
61
|
+
"task_app": {"id": task_app_id, **({"base_url": task_app_base_url} if task_app_base_url else {})},
|
|
62
|
+
"trace": normalize_for_json(trace),
|
|
63
|
+
"options": options or {},
|
|
64
|
+
}
|
|
65
|
+
try:
|
|
66
|
+
async with AsyncHttpClient(self._base, self._key, timeout=self._timeout) as http:
|
|
67
|
+
js = await http.post_json("/api/judge/v1/score", json=body)
|
|
68
|
+
if not isinstance(js, dict):
|
|
69
|
+
raise ValueError("invalid_judge_response_shape")
|
|
70
|
+
return js # type: ignore[return-value]
|
|
71
|
+
except HTTPError as e: # map to friendlier exceptions
|
|
72
|
+
status = int(getattr(e, "status", 0) or 0)
|
|
73
|
+
if status in (400, 422):
|
|
74
|
+
raise ValueError(f"judge_validation_error: {e.detail}") from e
|
|
75
|
+
if status in (401, 403):
|
|
76
|
+
raise PermissionError(f"judge_auth_error: {e.detail}") from e
|
|
77
|
+
if status == 404:
|
|
78
|
+
raise FileNotFoundError(f"judge_route_not_found: {e.detail}") from e
|
|
79
|
+
if status == 429:
|
|
80
|
+
raise Exception("judge_rate_limited") from e # replace with RetryLater in future
|
|
81
|
+
if status >= 500:
|
|
82
|
+
raise Exception("judge_transient_error") from e # replace with TransientError in future
|
|
83
|
+
raise
|
|
84
|
+
|
|
85
|
+
|
synth_ai/evals/types.py
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import Literal, TypedDict
|
|
4
|
+
|
|
5
|
+
Track = Literal["process", "reasoning", "progress", "outcome"]
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class Judgement(TypedDict, total=False):
|
|
9
|
+
key: str
|
|
10
|
+
title: str
|
|
11
|
+
description: str
|
|
12
|
+
score: float
|
|
13
|
+
reason: str
|
|
14
|
+
confidence: float
|
|
15
|
+
scale: Literal["binary", "bounded", "count", "custom"]
|
|
16
|
+
source: dict
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
class RewardJudgement(TypedDict, total=False):
|
|
20
|
+
judgement: Judgement
|
|
21
|
+
scope: Literal["step", "event", "outcome"]
|
|
22
|
+
turn: int | None
|
|
23
|
+
episode_id: str | None
|
|
24
|
+
reward_value: float | None
|
|
25
|
+
links: dict
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class TrackAggregate(TypedDict, total=False):
|
|
29
|
+
mean: float
|
|
30
|
+
median: float
|
|
31
|
+
std: float
|
|
32
|
+
n: int
|
|
33
|
+
|
|
34
|
+
|
|
35
|
+
class RewardMetadata(TypedDict, total=False):
|
|
36
|
+
per_window: list[RewardJudgement]
|
|
37
|
+
aggregates: dict[Track, TrackAggregate]
|
|
38
|
+
overall: dict[str, float] # {"final_outcome_score": float}
|
|
39
|
+
rubric: dict # {"ids": {...}, "hash": "..."}
|
|
40
|
+
model_info: dict # {"model": "...", ...}
|
|
41
|
+
|
|
42
|
+
|
synth_ai/jobs/client.py
CHANGED
|
@@ -1,10 +1,22 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
+
import importlib
|
|
3
4
|
from typing import Any
|
|
4
5
|
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
6
|
+
try:
|
|
7
|
+
normalize_model_identifier = importlib.import_module("synth_ai.api.models.supported").normalize_model_identifier
|
|
8
|
+
except Exception as exc: # pragma: no cover - critical dependency
|
|
9
|
+
raise RuntimeError("Unable to load supported model utilities") from exc
|
|
10
|
+
|
|
11
|
+
try:
|
|
12
|
+
AsyncHttpClient = importlib.import_module("synth_ai.http").AsyncHttpClient
|
|
13
|
+
except Exception as exc: # pragma: no cover - critical dependency
|
|
14
|
+
raise RuntimeError("Unable to load HTTP client") from exc
|
|
15
|
+
|
|
16
|
+
try:
|
|
17
|
+
prepare_sft_job_payload = importlib.import_module("synth_ai.learning.sft.config").prepare_sft_job_payload
|
|
18
|
+
except Exception as exc: # pragma: no cover - critical dependency
|
|
19
|
+
raise RuntimeError("Unable to load SFT configuration helpers") from exc
|
|
8
20
|
|
|
9
21
|
|
|
10
22
|
class FilesApi:
|