synth-ai 0.2.9.dev5__py3-none-any.whl → 0.2.9.dev7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- examples/common_old/backend.py +0 -1
- examples/crafter_debug_render.py +15 -6
- examples/evals_old/compare_models.py +1 -0
- examples/finetuning_old/_backup_synth_qwen/filter_traces_achievements.py +6 -2
- examples/finetuning_old/_backup_synth_qwen/react_agent_lm.py +4 -4
- examples/finetuning_old/_backup_synth_qwen/sft_kickoff.py +4 -3
- examples/finetuning_old/synth_qwen_v1/filter_traces_achievements.py +6 -2
- examples/finetuning_old/synth_qwen_v1/finetune.py +1 -1
- examples/finetuning_old/synth_qwen_v1/hello_ft_model.py +4 -4
- examples/finetuning_old/synth_qwen_v1/infer.py +1 -2
- examples/finetuning_old/synth_qwen_v1/poll.py +4 -2
- examples/finetuning_old/synth_qwen_v1/prepare_data.py +8 -8
- examples/finetuning_old/synth_qwen_v1/react_agent_lm.py +5 -4
- examples/finetuning_old/synth_qwen_v1/run_crafter_sft_job.py +11 -8
- examples/finetuning_old/synth_qwen_v1/run_ft_job.py +17 -12
- examples/finetuning_old/synth_qwen_v1/upload_data.py +1 -1
- examples/finetuning_old/synth_qwen_v1/util.py +7 -2
- examples/rl/configs/eval_base_qwen.toml +1 -1
- examples/rl/configs/rl_from_base_qwen17.toml +1 -1
- examples/rl/download_dataset.py +26 -10
- examples/rl/run_eval.py +17 -15
- examples/rl/run_rl_and_save.py +24 -7
- examples/rl/task_app/math_single_step.py +128 -11
- examples/rl/task_app/math_task_app.py +11 -3
- examples/rl_old/task_app.py +222 -53
- examples/warming_up_to_rl/analyze_trace_db.py +7 -5
- examples/warming_up_to_rl/export_trace_sft.py +141 -16
- examples/warming_up_to_rl/groq_test.py +11 -4
- examples/warming_up_to_rl/manage_secrets.py +15 -6
- examples/warming_up_to_rl/readme.md +9 -2
- examples/warming_up_to_rl/run_eval.py +108 -30
- examples/warming_up_to_rl/run_fft_and_save.py +128 -52
- examples/warming_up_to_rl/run_local_rollout.py +87 -36
- examples/warming_up_to_rl/run_local_rollout_modal.py +113 -25
- examples/warming_up_to_rl/run_local_rollout_parallel.py +80 -16
- examples/warming_up_to_rl/run_local_rollout_traced.py +125 -20
- examples/warming_up_to_rl/run_rl_and_save.py +31 -7
- examples/warming_up_to_rl/run_rollout_remote.py +37 -10
- examples/warming_up_to_rl/task_app/grpo_crafter.py +90 -27
- examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +9 -27
- examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +46 -108
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +50 -17
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +35 -21
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +8 -4
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +29 -26
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +17 -13
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +106 -63
- examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +82 -84
- examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +76 -59
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +43 -49
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +5 -15
- synth_ai/__init__.py +1 -0
- synth_ai/api/train/builders.py +34 -10
- synth_ai/api/train/cli.py +172 -32
- synth_ai/api/train/config_finder.py +59 -4
- synth_ai/api/train/env_resolver.py +32 -14
- synth_ai/api/train/pollers.py +11 -3
- synth_ai/api/train/task_app.py +4 -1
- synth_ai/api/train/utils.py +20 -4
- synth_ai/cli/__init__.py +11 -4
- synth_ai/cli/balance.py +1 -1
- synth_ai/cli/demo.py +19 -5
- synth_ai/cli/rl_demo.py +75 -16
- synth_ai/cli/root.py +116 -37
- synth_ai/cli/task_apps.py +1276 -186
- synth_ai/cli/traces.py +1 -0
- synth_ai/cli/turso.py +73 -0
- synth_ai/core/experiment.py +0 -2
- synth_ai/demo_registry.py +67 -30
- synth_ai/demos/core/cli.py +493 -164
- synth_ai/demos/demo_task_apps/core.py +50 -6
- synth_ai/demos/demo_task_apps/crafter/configs/crafter_fft_4b.toml +2 -3
- synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +36 -28
- synth_ai/demos/demo_task_apps/math/_common.py +1 -2
- synth_ai/demos/demo_task_apps/math/deploy_modal.py +0 -2
- synth_ai/demos/demo_task_apps/math/modal_task_app.py +168 -65
- synth_ai/demos/demo_task_apps/math/task_app_entry.py +0 -1
- synth_ai/environments/examples/bandit/engine.py +12 -4
- synth_ai/environments/examples/bandit/taskset.py +4 -4
- synth_ai/environments/reproducibility/tree.py +3 -1
- synth_ai/environments/service/core_routes.py +6 -2
- synth_ai/evals/base.py +0 -2
- synth_ai/experimental/synth_oss.py +11 -12
- synth_ai/handshake.py +3 -1
- synth_ai/http_client.py +31 -7
- synth_ai/inference/__init__.py +0 -2
- synth_ai/inference/client.py +8 -4
- synth_ai/jobs/client.py +40 -10
- synth_ai/learning/client.py +33 -8
- synth_ai/learning/config.py +0 -2
- synth_ai/learning/constants.py +0 -2
- synth_ai/learning/ft_client.py +6 -3
- synth_ai/learning/health.py +9 -2
- synth_ai/learning/jobs.py +17 -5
- synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +1 -3
- synth_ai/learning/prompts/random_search.py +4 -1
- synth_ai/learning/prompts/run_random_search_banking77.py +6 -1
- synth_ai/learning/rl_client.py +42 -14
- synth_ai/learning/sse.py +0 -2
- synth_ai/learning/validators.py +6 -2
- synth_ai/lm/caching/ephemeral.py +1 -3
- synth_ai/lm/core/exceptions.py +0 -2
- synth_ai/lm/core/main.py +13 -1
- synth_ai/lm/core/synth_models.py +0 -1
- synth_ai/lm/core/vendor_clients.py +4 -2
- synth_ai/lm/overrides.py +2 -2
- synth_ai/lm/vendors/core/anthropic_api.py +7 -7
- synth_ai/lm/vendors/core/openai_api.py +2 -0
- synth_ai/lm/vendors/openai_standard.py +3 -1
- synth_ai/lm/vendors/openai_standard_responses.py +6 -3
- synth_ai/lm/vendors/supported/custom_endpoint.py +1 -3
- synth_ai/lm/vendors/synth_client.py +37 -10
- synth_ai/rl/__init__.py +0 -1
- synth_ai/rl/contracts.py +0 -2
- synth_ai/rl/env_keys.py +6 -1
- synth_ai/task/__init__.py +1 -0
- synth_ai/task/apps/__init__.py +11 -11
- synth_ai/task/auth.py +29 -17
- synth_ai/task/client.py +3 -1
- synth_ai/task/contracts.py +1 -0
- synth_ai/task/datasets.py +3 -1
- synth_ai/task/errors.py +3 -2
- synth_ai/task/health.py +0 -2
- synth_ai/task/json.py +0 -1
- synth_ai/task/proxy.py +2 -5
- synth_ai/task/rubrics.py +9 -3
- synth_ai/task/server.py +31 -5
- synth_ai/task/tracing_utils.py +8 -3
- synth_ai/task/validators.py +0 -1
- synth_ai/task/vendors.py +0 -1
- synth_ai/tracing_v3/db_config.py +26 -1
- synth_ai/tracing_v3/decorators.py +1 -0
- synth_ai/tracing_v3/examples/basic_usage.py +3 -2
- synth_ai/tracing_v3/hooks.py +2 -0
- synth_ai/tracing_v3/replica_sync.py +1 -0
- synth_ai/tracing_v3/session_tracer.py +24 -3
- synth_ai/tracing_v3/storage/base.py +4 -1
- synth_ai/tracing_v3/storage/factory.py +0 -1
- synth_ai/tracing_v3/turso/manager.py +102 -38
- synth_ai/tracing_v3/turso/models.py +4 -1
- synth_ai/tracing_v3/utils.py +1 -0
- synth_ai/v0/tracing/upload.py +32 -135
- {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.9.dev7.dist-info}/METADATA +1 -1
- {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.9.dev7.dist-info}/RECORD +154 -154
- synth_ai/install_sqld.sh +0 -40
- {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.9.dev7.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.9.dev7.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.9.dev7.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.9.dev7.dist-info}/top_level.txt +0 -0
examples/rl/run_rl_and_save.py
CHANGED
|
@@ -24,10 +24,18 @@ def _load_toml(path: Path) -> Dict[str, Any]:
|
|
|
24
24
|
|
|
25
25
|
def main() -> None:
|
|
26
26
|
parser = argparse.ArgumentParser(description="Create math RL job via backend RL endpoint")
|
|
27
|
-
parser.add_argument(
|
|
27
|
+
parser.add_argument(
|
|
28
|
+
"--backend", default=os.getenv("BACKEND_BASE_URL", "http://localhost:8000/api")
|
|
29
|
+
)
|
|
28
30
|
parser.add_argument("--config", required=True, help="Path to RL TOML config")
|
|
29
|
-
parser.add_argument(
|
|
30
|
-
|
|
31
|
+
parser.add_argument(
|
|
32
|
+
"--task-url", default=os.getenv("TASK_APP_URL", ""), help="Override task service URL"
|
|
33
|
+
)
|
|
34
|
+
parser.add_argument(
|
|
35
|
+
"--idempotency",
|
|
36
|
+
default=os.getenv("RL_IDEMPOTENCY_KEY", ""),
|
|
37
|
+
help="Optional Idempotency-Key header",
|
|
38
|
+
)
|
|
31
39
|
args = parser.parse_args()
|
|
32
40
|
|
|
33
41
|
cfg_path = Path(args.config).expanduser()
|
|
@@ -35,16 +43,26 @@ def main() -> None:
|
|
|
35
43
|
|
|
36
44
|
services = cfg.get("services") if isinstance(cfg.get("services"), dict) else {}
|
|
37
45
|
|
|
38
|
-
task_url = (
|
|
46
|
+
task_url = (
|
|
47
|
+
(args.task_url or "").strip()
|
|
48
|
+
or (os.getenv("TASK_APP_URL") or "").strip()
|
|
49
|
+
or (services.get("task_url") or "").strip()
|
|
50
|
+
)
|
|
39
51
|
if not task_url:
|
|
40
|
-
print(
|
|
52
|
+
print(
|
|
53
|
+
"Missing task service URL. Provide --task-url or set TASK_APP_URL or services.task_url in TOML",
|
|
54
|
+
file=sys.stderr,
|
|
55
|
+
)
|
|
41
56
|
sys.exit(2)
|
|
42
57
|
|
|
43
58
|
model_cfg = cfg.get("model") if isinstance(cfg.get("model"), dict) else {}
|
|
44
59
|
has_source = bool((model_cfg.get("source") or "").strip())
|
|
45
60
|
has_base = bool((model_cfg.get("base") or "").strip())
|
|
46
61
|
if has_source == has_base:
|
|
47
|
-
print(
|
|
62
|
+
print(
|
|
63
|
+
"Model section must specify exactly one of [model].source or [model].base",
|
|
64
|
+
file=sys.stderr,
|
|
65
|
+
)
|
|
48
66
|
sys.exit(2)
|
|
49
67
|
|
|
50
68
|
payload: Dict[str, Any] = {
|
|
@@ -91,4 +109,3 @@ def main() -> None:
|
|
|
91
109
|
|
|
92
110
|
if __name__ == "__main__":
|
|
93
111
|
main()
|
|
94
|
-
|
|
@@ -40,7 +40,9 @@ from synth_ai.tracing_v3.session_tracer import SessionTracer
|
|
|
40
40
|
|
|
41
41
|
REPO_ROOT = Path(__file__).resolve().parents[3]
|
|
42
42
|
|
|
43
|
-
_modal_volume_candidate = Path(
|
|
43
|
+
_modal_volume_candidate = Path(
|
|
44
|
+
os.getenv("MATH_MODAL_DATASET_DIR", "/modal_volumes/math_dataset")
|
|
45
|
+
).expanduser()
|
|
44
46
|
_modal_volume_root: Optional[Path] = None
|
|
45
47
|
try:
|
|
46
48
|
_modal_volume_candidate.mkdir(parents=True, exist_ok=True)
|
|
@@ -55,7 +57,9 @@ if _modal_volume_root is not None:
|
|
|
55
57
|
local_dataset_dir.mkdir(parents=True, exist_ok=True)
|
|
56
58
|
os.environ.setdefault("MATH_DATASET_LOCAL_DIR", str(local_dataset_dir))
|
|
57
59
|
else:
|
|
58
|
-
hf_cache_path = Path(
|
|
60
|
+
hf_cache_path = Path(
|
|
61
|
+
os.getenv("MATH_DATASET_CACHE_DIR", str(REPO_ROOT / ".cache" / "hf-datasets"))
|
|
62
|
+
).expanduser()
|
|
59
63
|
|
|
60
64
|
hf_cache_path.mkdir(parents=True, exist_ok=True)
|
|
61
65
|
os.environ.setdefault("MATH_DATASET_CACHE_DIR", str(hf_cache_path))
|
|
@@ -203,7 +207,9 @@ class MathDataset:
|
|
|
203
207
|
if split not in self._cache:
|
|
204
208
|
local_file = self._local_file_for_split(split)
|
|
205
209
|
if local_file is not None:
|
|
206
|
-
dataset = load_dataset(
|
|
210
|
+
dataset = load_dataset(
|
|
211
|
+
"json", data_files=str(local_file), cache_dir=str(HF_DATASETS_CACHE)
|
|
212
|
+
)
|
|
207
213
|
self._cache[split] = dataset["train"]
|
|
208
214
|
else:
|
|
209
215
|
try:
|
|
@@ -301,9 +307,7 @@ class MathDataset:
|
|
|
301
307
|
except Exception as exc:
|
|
302
308
|
errors.append(f"{split}: {exc}")
|
|
303
309
|
if errors:
|
|
304
|
-
raise RuntimeError(
|
|
305
|
-
"Dataset preparation failed:\n" + "\n".join(errors)
|
|
306
|
-
)
|
|
310
|
+
raise RuntimeError("Dataset preparation failed:\n" + "\n".join(errors))
|
|
307
311
|
|
|
308
312
|
|
|
309
313
|
@dataclass
|
|
@@ -362,7 +366,9 @@ def _observation_from_state(state: MathEnvState) -> Dict[str, Any]:
|
|
|
362
366
|
}
|
|
363
367
|
|
|
364
368
|
|
|
365
|
-
def _score_submission(
|
|
369
|
+
def _score_submission(
|
|
370
|
+
state: MathEnvState, tool_calls: Sequence[Mapping[str, Any]]
|
|
371
|
+
) -> tuple[float, str, bool]:
|
|
366
372
|
if not tool_calls:
|
|
367
373
|
return REWARD_NEGATIVE_NO_TOOL, "missing_tool_call", False
|
|
368
374
|
call = tool_calls[0]
|
|
@@ -374,12 +380,59 @@ def _score_submission(state: MathEnvState, tool_calls: Sequence[Mapping[str, Any
|
|
|
374
380
|
if not answer:
|
|
375
381
|
return REWARD_NEGATIVE_NO_ANSWER, "blank_answer", False
|
|
376
382
|
is_correct = answer == state.answer
|
|
377
|
-
return (
|
|
383
|
+
return (
|
|
384
|
+
(REWARD_POSITIVE if is_correct else 0.0),
|
|
385
|
+
("correct" if is_correct else "incorrect"),
|
|
386
|
+
is_correct,
|
|
387
|
+
)
|
|
378
388
|
|
|
379
389
|
|
|
380
390
|
math_router = APIRouter()
|
|
381
391
|
|
|
382
392
|
|
|
393
|
+
def _preview_tool_calls(tool_calls: Sequence[Mapping[str, Any]]) -> list[Dict[str, Any]]:
|
|
394
|
+
"""Return a compact, log-friendly preview of tool calls.
|
|
395
|
+
|
|
396
|
+
Truncates long fields to avoid noisy logs and leaking excessive content.
|
|
397
|
+
"""
|
|
398
|
+
preview: list[Dict[str, Any]] = []
|
|
399
|
+
for call in list(tool_calls or [])[:3]:
|
|
400
|
+
args = dict(call.get("args") or {})
|
|
401
|
+
answer = str(args.get("answer") or "")
|
|
402
|
+
# Hard truncate to keep logs compact
|
|
403
|
+
answer_short = answer[:120] + ("…" if len(answer) > 120 else "")
|
|
404
|
+
preview.append(
|
|
405
|
+
{
|
|
406
|
+
"tool": call.get("tool"),
|
|
407
|
+
"answer": answer_short,
|
|
408
|
+
}
|
|
409
|
+
)
|
|
410
|
+
return preview
|
|
411
|
+
|
|
412
|
+
|
|
413
|
+
def _event_and_outcome_components(
|
|
414
|
+
tool_calls: Sequence[Mapping[str, Any]], *, correct: bool, reward: float
|
|
415
|
+
) -> Dict[str, float]:
|
|
416
|
+
"""Approximate component-wise scores for RL-style logs.
|
|
417
|
+
|
|
418
|
+
- env: task-level scalar reward (our single-step outcome)
|
|
419
|
+
- rubric_event: 1.0 if a valid tool call with non-empty answer was made else 0.0
|
|
420
|
+
- rubric_outcome: 1.0 if final answer was correct else 0.0
|
|
421
|
+
"""
|
|
422
|
+
has_valid_tool = False
|
|
423
|
+
if tool_calls:
|
|
424
|
+
first = tool_calls[0] or {}
|
|
425
|
+
if str(first.get("tool") or "") == TOOL_NAME:
|
|
426
|
+
args = first.get("args") or {}
|
|
427
|
+
ans = str(args.get("answer") or "").strip()
|
|
428
|
+
has_valid_tool = bool(ans)
|
|
429
|
+
return {
|
|
430
|
+
"env": float(reward),
|
|
431
|
+
"rubric_event": 1.0 if has_valid_tool else 0.0,
|
|
432
|
+
"rubric_outcome": 1.0 if bool(correct) else 0.0,
|
|
433
|
+
}
|
|
434
|
+
|
|
435
|
+
|
|
383
436
|
@math_router.post("/env/math/initialize")
|
|
384
437
|
async def initialize_env(request: Request, payload: InitializePayload) -> Dict[str, Any]:
|
|
385
438
|
manager: MathEnvironmentManager = request.app.state.math_env_manager
|
|
@@ -410,6 +463,28 @@ async def step_env(request: Request, payload: Dict[str, Any]) -> Dict[str, Any]:
|
|
|
410
463
|
action = payload.get("action") or {}
|
|
411
464
|
tool_calls = action.get("tool_calls") or payload.get("tool_calls") or []
|
|
412
465
|
reward, status, correct = _score_submission(state, tool_calls)
|
|
466
|
+
try:
|
|
467
|
+
print(
|
|
468
|
+
"[MATH_STEP] env_id=",
|
|
469
|
+
state.env_id,
|
|
470
|
+
" split=",
|
|
471
|
+
state.split,
|
|
472
|
+
" index=",
|
|
473
|
+
state.index,
|
|
474
|
+
" calls=",
|
|
475
|
+
_preview_tool_calls(tool_calls),
|
|
476
|
+
" reward=",
|
|
477
|
+
reward,
|
|
478
|
+
" status=",
|
|
479
|
+
status,
|
|
480
|
+
" correct=",
|
|
481
|
+
correct,
|
|
482
|
+
" components=",
|
|
483
|
+
_event_and_outcome_components(tool_calls, correct=correct, reward=reward),
|
|
484
|
+
flush=True,
|
|
485
|
+
)
|
|
486
|
+
except Exception:
|
|
487
|
+
pass
|
|
413
488
|
state.done = True
|
|
414
489
|
|
|
415
490
|
observation = _observation_from_state(state)
|
|
@@ -448,7 +523,9 @@ def _resolve_inference_url(base_url: str) -> str:
|
|
|
448
523
|
return f"{normalized}/v1/chat/completions"
|
|
449
524
|
|
|
450
525
|
|
|
451
|
-
async def _call_inference(
|
|
526
|
+
async def _call_inference(
|
|
527
|
+
policy_config: Mapping[str, Any], observation: Mapping[str, Any]
|
|
528
|
+
) -> tuple[list[Dict[str, Any]], Dict[str, Any]]:
|
|
452
529
|
inference_url = str(policy_config.get("inference_url") or "").rstrip("/")
|
|
453
530
|
if not inference_url:
|
|
454
531
|
raise RuntimeError("policy.config.inference_url required for rollout")
|
|
@@ -562,6 +639,17 @@ async def _call_inference(policy_config: Mapping[str, Any], observation: Mapping
|
|
|
562
639
|
else:
|
|
563
640
|
parsed_args = {}
|
|
564
641
|
tool_calls.append({"tool": name, "args": parsed_args})
|
|
642
|
+
# Lightweight provider-side logging
|
|
643
|
+
try:
|
|
644
|
+
print(
|
|
645
|
+
"[MATH_INFER] model=",
|
|
646
|
+
model,
|
|
647
|
+
" calls=",
|
|
648
|
+
_preview_tool_calls(tool_calls),
|
|
649
|
+
flush=True,
|
|
650
|
+
)
|
|
651
|
+
except Exception:
|
|
652
|
+
pass
|
|
565
653
|
return tool_calls, data
|
|
566
654
|
|
|
567
655
|
|
|
@@ -580,7 +668,9 @@ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) ->
|
|
|
580
668
|
inference_payload: Dict[str, Any] | None = None
|
|
581
669
|
error_info: Dict[str, Any] = {}
|
|
582
670
|
try:
|
|
583
|
-
tool_calls, inference_payload = await _call_inference(
|
|
671
|
+
tool_calls, inference_payload = await _call_inference(
|
|
672
|
+
request.policy.config or {}, observation
|
|
673
|
+
)
|
|
584
674
|
except HTTPException as http_err:
|
|
585
675
|
tool_calls = []
|
|
586
676
|
error_info = {"error": http_err.detail, "code": http_err.status_code}
|
|
@@ -600,6 +690,30 @@ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) ->
|
|
|
600
690
|
tool_calls,
|
|
601
691
|
)
|
|
602
692
|
|
|
693
|
+
# Log a concise summary so we can debug reward=0 issues in production
|
|
694
|
+
try:
|
|
695
|
+
print(
|
|
696
|
+
"[MATH_ROLLOUT] run=",
|
|
697
|
+
request.run_id,
|
|
698
|
+
" split=",
|
|
699
|
+
sample["split"],
|
|
700
|
+
" index=",
|
|
701
|
+
sample["index"],
|
|
702
|
+
" calls=",
|
|
703
|
+
_preview_tool_calls(tool_calls),
|
|
704
|
+
" reward=",
|
|
705
|
+
reward,
|
|
706
|
+
" status=",
|
|
707
|
+
status,
|
|
708
|
+
" correct=",
|
|
709
|
+
correct,
|
|
710
|
+
" components=",
|
|
711
|
+
_event_and_outcome_components(tool_calls, correct=correct, reward=reward),
|
|
712
|
+
flush=True,
|
|
713
|
+
)
|
|
714
|
+
except Exception:
|
|
715
|
+
pass
|
|
716
|
+
|
|
603
717
|
step = RolloutStep(
|
|
604
718
|
obs=observation,
|
|
605
719
|
tool_calls=tool_calls,
|
|
@@ -610,6 +724,7 @@ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) ->
|
|
|
610
724
|
"status": status,
|
|
611
725
|
"correct": correct,
|
|
612
726
|
"raw_solution": sample["raw_solution"],
|
|
727
|
+
"tool_call_preview": _preview_tool_calls(tool_calls),
|
|
613
728
|
**error_info,
|
|
614
729
|
},
|
|
615
730
|
)
|
|
@@ -775,7 +890,9 @@ def build_config() -> TaskAppConfig:
|
|
|
775
890
|
|
|
776
891
|
tracing_enabled = tracing_env_enabled()
|
|
777
892
|
tracing_db_url = resolve_tracing_db_url()
|
|
778
|
-
tracer_factory = build_tracer_factory(
|
|
893
|
+
tracer_factory = build_tracer_factory(
|
|
894
|
+
SessionTracer, enabled=tracing_enabled, db_url=tracing_db_url
|
|
895
|
+
)
|
|
779
896
|
sft_output_dir = resolve_sft_output_dir()
|
|
780
897
|
|
|
781
898
|
app_state: Dict[str, Any] = {
|
|
@@ -40,7 +40,10 @@ def fastapi_app():
|
|
|
40
40
|
async def health(request: Request):
|
|
41
41
|
env_key = normalize_environment_api_key()
|
|
42
42
|
if not env_key:
|
|
43
|
-
return JSONResponse(
|
|
43
|
+
return JSONResponse(
|
|
44
|
+
status_code=503,
|
|
45
|
+
content={"status": "unhealthy", "detail": "Missing ENVIRONMENT_API_KEY"},
|
|
46
|
+
)
|
|
44
47
|
if not is_api_key_header_authorized(request):
|
|
45
48
|
prefix = _log_env_key_prefix("health", env_key)
|
|
46
49
|
content = {"status": "healthy", "authorized": False}
|
|
@@ -53,7 +56,10 @@ def fastapi_app():
|
|
|
53
56
|
async def health_rollout(request: Request):
|
|
54
57
|
env_key = normalize_environment_api_key()
|
|
55
58
|
if not env_key:
|
|
56
|
-
return JSONResponse(
|
|
59
|
+
return JSONResponse(
|
|
60
|
+
status_code=503,
|
|
61
|
+
content={"status": "unhealthy", "detail": "Missing ENVIRONMENT_API_KEY"},
|
|
62
|
+
)
|
|
57
63
|
if not is_api_key_header_authorized(request):
|
|
58
64
|
prefix = _log_env_key_prefix("health/rollout", env_key)
|
|
59
65
|
content = {"status": "healthy", "authorized": False}
|
|
@@ -76,7 +82,9 @@ def fastapi_app():
|
|
|
76
82
|
print("[422] validation", snapshot, flush=True)
|
|
77
83
|
except Exception:
|
|
78
84
|
pass
|
|
79
|
-
return JSONResponse(
|
|
85
|
+
return JSONResponse(
|
|
86
|
+
status_code=422, content={"status": "invalid", "detail": exc.errors()[:5]}
|
|
87
|
+
)
|
|
80
88
|
|
|
81
89
|
return app
|
|
82
90
|
|