synth-ai 0.2.9.dev4__py3-none-any.whl → 0.2.9.dev7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (157) hide show
  1. examples/common_old/backend.py +0 -1
  2. examples/crafter_debug_render.py +15 -6
  3. examples/evals_old/compare_models.py +1 -0
  4. examples/finetuning_old/_backup_synth_qwen/filter_traces_achievements.py +6 -2
  5. examples/finetuning_old/_backup_synth_qwen/react_agent_lm.py +4 -4
  6. examples/finetuning_old/_backup_synth_qwen/sft_kickoff.py +4 -3
  7. examples/finetuning_old/synth_qwen_v1/filter_traces_achievements.py +6 -2
  8. examples/finetuning_old/synth_qwen_v1/finetune.py +1 -1
  9. examples/finetuning_old/synth_qwen_v1/hello_ft_model.py +4 -4
  10. examples/finetuning_old/synth_qwen_v1/infer.py +1 -2
  11. examples/finetuning_old/synth_qwen_v1/poll.py +4 -2
  12. examples/finetuning_old/synth_qwen_v1/prepare_data.py +8 -8
  13. examples/finetuning_old/synth_qwen_v1/react_agent_lm.py +5 -4
  14. examples/finetuning_old/synth_qwen_v1/run_crafter_sft_job.py +11 -8
  15. examples/finetuning_old/synth_qwen_v1/run_ft_job.py +17 -12
  16. examples/finetuning_old/synth_qwen_v1/upload_data.py +1 -1
  17. examples/finetuning_old/synth_qwen_v1/util.py +7 -2
  18. examples/rl/configs/eval_base_qwen.toml +1 -1
  19. examples/rl/configs/rl_from_base_qwen17.toml +1 -1
  20. examples/rl/download_dataset.py +26 -10
  21. examples/rl/run_eval.py +17 -15
  22. examples/rl/run_rl_and_save.py +24 -7
  23. examples/rl/task_app/math_single_step.py +128 -11
  24. examples/rl/task_app/math_task_app.py +11 -3
  25. examples/rl_old/task_app.py +222 -53
  26. examples/warming_up_to_rl/analyze_trace_db.py +7 -5
  27. examples/warming_up_to_rl/export_trace_sft.py +141 -16
  28. examples/warming_up_to_rl/groq_test.py +11 -4
  29. examples/warming_up_to_rl/manage_secrets.py +15 -6
  30. examples/warming_up_to_rl/readme.md +9 -2
  31. examples/warming_up_to_rl/run_eval.py +108 -30
  32. examples/warming_up_to_rl/run_fft_and_save.py +128 -52
  33. examples/warming_up_to_rl/run_local_rollout.py +87 -36
  34. examples/warming_up_to_rl/run_local_rollout_modal.py +113 -25
  35. examples/warming_up_to_rl/run_local_rollout_parallel.py +80 -16
  36. examples/warming_up_to_rl/run_local_rollout_traced.py +125 -20
  37. examples/warming_up_to_rl/run_rl_and_save.py +31 -7
  38. examples/warming_up_to_rl/run_rollout_remote.py +37 -10
  39. examples/warming_up_to_rl/task_app/grpo_crafter.py +90 -27
  40. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +9 -27
  41. examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +46 -108
  42. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -1
  43. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +1 -1
  44. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -1
  45. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +50 -17
  46. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +35 -21
  47. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +8 -4
  48. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +29 -26
  49. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +1 -1
  50. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +17 -13
  51. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +1 -1
  52. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +106 -63
  53. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +82 -84
  54. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +76 -59
  55. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +1 -1
  56. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +43 -49
  57. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +5 -15
  58. synth_ai/__init__.py +1 -0
  59. synth_ai/api/train/builders.py +34 -10
  60. synth_ai/api/train/cli.py +172 -32
  61. synth_ai/api/train/config_finder.py +59 -4
  62. synth_ai/api/train/env_resolver.py +32 -14
  63. synth_ai/api/train/pollers.py +11 -3
  64. synth_ai/api/train/task_app.py +4 -1
  65. synth_ai/api/train/utils.py +20 -4
  66. synth_ai/cli/__init__.py +11 -4
  67. synth_ai/cli/balance.py +1 -1
  68. synth_ai/cli/demo.py +19 -5
  69. synth_ai/cli/rl_demo.py +75 -16
  70. synth_ai/cli/root.py +116 -37
  71. synth_ai/cli/task_apps.py +1286 -170
  72. synth_ai/cli/traces.py +1 -0
  73. synth_ai/cli/turso.py +73 -0
  74. synth_ai/core/experiment.py +0 -2
  75. synth_ai/demo_registry.py +67 -30
  76. synth_ai/demos/core/cli.py +493 -164
  77. synth_ai/demos/demo_task_apps/core.py +50 -6
  78. synth_ai/demos/demo_task_apps/crafter/configs/crafter_fft_4b.toml +2 -3
  79. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +36 -28
  80. synth_ai/demos/demo_task_apps/math/_common.py +1 -2
  81. synth_ai/demos/demo_task_apps/math/deploy_modal.py +0 -2
  82. synth_ai/demos/demo_task_apps/math/modal_task_app.py +168 -65
  83. synth_ai/demos/demo_task_apps/math/task_app_entry.py +0 -1
  84. synth_ai/environments/examples/bandit/engine.py +12 -4
  85. synth_ai/environments/examples/bandit/taskset.py +4 -4
  86. synth_ai/environments/reproducibility/tree.py +3 -1
  87. synth_ai/environments/service/core_routes.py +6 -2
  88. synth_ai/evals/base.py +0 -2
  89. synth_ai/experimental/synth_oss.py +11 -12
  90. synth_ai/handshake.py +3 -1
  91. synth_ai/http_client.py +31 -7
  92. synth_ai/inference/__init__.py +0 -2
  93. synth_ai/inference/client.py +8 -4
  94. synth_ai/jobs/client.py +40 -10
  95. synth_ai/learning/client.py +33 -8
  96. synth_ai/learning/config.py +0 -2
  97. synth_ai/learning/constants.py +0 -2
  98. synth_ai/learning/ft_client.py +6 -3
  99. synth_ai/learning/health.py +9 -2
  100. synth_ai/learning/jobs.py +17 -5
  101. synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +1 -3
  102. synth_ai/learning/prompts/random_search.py +4 -1
  103. synth_ai/learning/prompts/run_random_search_banking77.py +6 -1
  104. synth_ai/learning/rl_client.py +42 -14
  105. synth_ai/learning/sse.py +0 -2
  106. synth_ai/learning/validators.py +6 -2
  107. synth_ai/lm/caching/ephemeral.py +1 -3
  108. synth_ai/lm/core/exceptions.py +0 -2
  109. synth_ai/lm/core/main.py +13 -1
  110. synth_ai/lm/core/synth_models.py +0 -1
  111. synth_ai/lm/core/vendor_clients.py +4 -2
  112. synth_ai/lm/overrides.py +2 -2
  113. synth_ai/lm/vendors/core/anthropic_api.py +7 -7
  114. synth_ai/lm/vendors/core/openai_api.py +2 -0
  115. synth_ai/lm/vendors/openai_standard.py +3 -1
  116. synth_ai/lm/vendors/openai_standard_responses.py +6 -3
  117. synth_ai/lm/vendors/supported/custom_endpoint.py +1 -3
  118. synth_ai/lm/vendors/synth_client.py +37 -10
  119. synth_ai/rl/__init__.py +0 -1
  120. synth_ai/rl/contracts.py +0 -2
  121. synth_ai/rl/env_keys.py +6 -1
  122. synth_ai/task/__init__.py +1 -0
  123. synth_ai/task/apps/__init__.py +11 -11
  124. synth_ai/task/auth.py +29 -17
  125. synth_ai/task/client.py +3 -1
  126. synth_ai/task/contracts.py +1 -0
  127. synth_ai/task/datasets.py +3 -1
  128. synth_ai/task/errors.py +3 -2
  129. synth_ai/task/health.py +0 -2
  130. synth_ai/task/json.py +0 -1
  131. synth_ai/task/proxy.py +2 -5
  132. synth_ai/task/rubrics.py +9 -3
  133. synth_ai/task/server.py +31 -5
  134. synth_ai/task/tracing_utils.py +8 -3
  135. synth_ai/task/validators.py +0 -1
  136. synth_ai/task/vendors.py +0 -1
  137. synth_ai/tracing_v3/db_config.py +26 -1
  138. synth_ai/tracing_v3/decorators.py +1 -0
  139. synth_ai/tracing_v3/examples/basic_usage.py +3 -2
  140. synth_ai/tracing_v3/hooks.py +2 -0
  141. synth_ai/tracing_v3/replica_sync.py +1 -0
  142. synth_ai/tracing_v3/session_tracer.py +24 -3
  143. synth_ai/tracing_v3/storage/base.py +4 -1
  144. synth_ai/tracing_v3/storage/factory.py +0 -1
  145. synth_ai/tracing_v3/turso/manager.py +102 -38
  146. synth_ai/tracing_v3/turso/models.py +4 -1
  147. synth_ai/tracing_v3/utils.py +1 -0
  148. synth_ai/v0/tracing/upload.py +32 -135
  149. {synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev7.dist-info}/METADATA +1 -1
  150. {synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev7.dist-info}/RECORD +154 -156
  151. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_stepwise_rewards.py +0 -58
  152. synth_ai/environments/examples/sokoban/units/astar_common.py +0 -95
  153. synth_ai/install_sqld.sh +0 -40
  154. {synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev7.dist-info}/WHEEL +0 -0
  155. {synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev7.dist-info}/entry_points.txt +0 -0
  156. {synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev7.dist-info}/licenses/LICENSE +0 -0
  157. {synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev7.dist-info}/top_level.txt +0 -0
@@ -24,10 +24,18 @@ def _load_toml(path: Path) -> Dict[str, Any]:
24
24
 
25
25
  def main() -> None:
26
26
  parser = argparse.ArgumentParser(description="Create math RL job via backend RL endpoint")
27
- parser.add_argument("--backend", default=os.getenv("BACKEND_BASE_URL", "http://localhost:8000/api"))
27
+ parser.add_argument(
28
+ "--backend", default=os.getenv("BACKEND_BASE_URL", "http://localhost:8000/api")
29
+ )
28
30
  parser.add_argument("--config", required=True, help="Path to RL TOML config")
29
- parser.add_argument("--task-url", default=os.getenv("TASK_APP_URL", ""), help="Override task service URL")
30
- parser.add_argument("--idempotency", default=os.getenv("RL_IDEMPOTENCY_KEY", ""), help="Optional Idempotency-Key header")
31
+ parser.add_argument(
32
+ "--task-url", default=os.getenv("TASK_APP_URL", ""), help="Override task service URL"
33
+ )
34
+ parser.add_argument(
35
+ "--idempotency",
36
+ default=os.getenv("RL_IDEMPOTENCY_KEY", ""),
37
+ help="Optional Idempotency-Key header",
38
+ )
31
39
  args = parser.parse_args()
32
40
 
33
41
  cfg_path = Path(args.config).expanduser()
@@ -35,16 +43,26 @@ def main() -> None:
35
43
 
36
44
  services = cfg.get("services") if isinstance(cfg.get("services"), dict) else {}
37
45
 
38
- task_url = (args.task_url or "").strip() or (os.getenv("TASK_APP_URL") or "").strip() or (services.get("task_url") or "").strip()
46
+ task_url = (
47
+ (args.task_url or "").strip()
48
+ or (os.getenv("TASK_APP_URL") or "").strip()
49
+ or (services.get("task_url") or "").strip()
50
+ )
39
51
  if not task_url:
40
- print("Missing task service URL. Provide --task-url or set TASK_APP_URL or services.task_url in TOML", file=sys.stderr)
52
+ print(
53
+ "Missing task service URL. Provide --task-url or set TASK_APP_URL or services.task_url in TOML",
54
+ file=sys.stderr,
55
+ )
41
56
  sys.exit(2)
42
57
 
43
58
  model_cfg = cfg.get("model") if isinstance(cfg.get("model"), dict) else {}
44
59
  has_source = bool((model_cfg.get("source") or "").strip())
45
60
  has_base = bool((model_cfg.get("base") or "").strip())
46
61
  if has_source == has_base:
47
- print("Model section must specify exactly one of [model].source or [model].base", file=sys.stderr)
62
+ print(
63
+ "Model section must specify exactly one of [model].source or [model].base",
64
+ file=sys.stderr,
65
+ )
48
66
  sys.exit(2)
49
67
 
50
68
  payload: Dict[str, Any] = {
@@ -91,4 +109,3 @@ def main() -> None:
91
109
 
92
110
  if __name__ == "__main__":
93
111
  main()
94
-
@@ -40,7 +40,9 @@ from synth_ai.tracing_v3.session_tracer import SessionTracer
40
40
 
41
41
  REPO_ROOT = Path(__file__).resolve().parents[3]
42
42
 
43
- _modal_volume_candidate = Path(os.getenv("MATH_MODAL_DATASET_DIR", "/modal_volumes/math_dataset")).expanduser()
43
+ _modal_volume_candidate = Path(
44
+ os.getenv("MATH_MODAL_DATASET_DIR", "/modal_volumes/math_dataset")
45
+ ).expanduser()
44
46
  _modal_volume_root: Optional[Path] = None
45
47
  try:
46
48
  _modal_volume_candidate.mkdir(parents=True, exist_ok=True)
@@ -55,7 +57,9 @@ if _modal_volume_root is not None:
55
57
  local_dataset_dir.mkdir(parents=True, exist_ok=True)
56
58
  os.environ.setdefault("MATH_DATASET_LOCAL_DIR", str(local_dataset_dir))
57
59
  else:
58
- hf_cache_path = Path(os.getenv("MATH_DATASET_CACHE_DIR", str(REPO_ROOT / ".cache" / "hf-datasets")) ).expanduser()
60
+ hf_cache_path = Path(
61
+ os.getenv("MATH_DATASET_CACHE_DIR", str(REPO_ROOT / ".cache" / "hf-datasets"))
62
+ ).expanduser()
59
63
 
60
64
  hf_cache_path.mkdir(parents=True, exist_ok=True)
61
65
  os.environ.setdefault("MATH_DATASET_CACHE_DIR", str(hf_cache_path))
@@ -203,7 +207,9 @@ class MathDataset:
203
207
  if split not in self._cache:
204
208
  local_file = self._local_file_for_split(split)
205
209
  if local_file is not None:
206
- dataset = load_dataset("json", data_files=str(local_file), cache_dir=str(HF_DATASETS_CACHE))
210
+ dataset = load_dataset(
211
+ "json", data_files=str(local_file), cache_dir=str(HF_DATASETS_CACHE)
212
+ )
207
213
  self._cache[split] = dataset["train"]
208
214
  else:
209
215
  try:
@@ -301,9 +307,7 @@ class MathDataset:
301
307
  except Exception as exc:
302
308
  errors.append(f"{split}: {exc}")
303
309
  if errors:
304
- raise RuntimeError(
305
- "Dataset preparation failed:\n" + "\n".join(errors)
306
- )
310
+ raise RuntimeError("Dataset preparation failed:\n" + "\n".join(errors))
307
311
 
308
312
 
309
313
  @dataclass
@@ -362,7 +366,9 @@ def _observation_from_state(state: MathEnvState) -> Dict[str, Any]:
362
366
  }
363
367
 
364
368
 
365
- def _score_submission(state: MathEnvState, tool_calls: Sequence[Mapping[str, Any]]) -> tuple[float, str, bool]:
369
+ def _score_submission(
370
+ state: MathEnvState, tool_calls: Sequence[Mapping[str, Any]]
371
+ ) -> tuple[float, str, bool]:
366
372
  if not tool_calls:
367
373
  return REWARD_NEGATIVE_NO_TOOL, "missing_tool_call", False
368
374
  call = tool_calls[0]
@@ -374,12 +380,59 @@ def _score_submission(state: MathEnvState, tool_calls: Sequence[Mapping[str, Any
374
380
  if not answer:
375
381
  return REWARD_NEGATIVE_NO_ANSWER, "blank_answer", False
376
382
  is_correct = answer == state.answer
377
- return (REWARD_POSITIVE if is_correct else 0.0), ("correct" if is_correct else "incorrect"), is_correct
383
+ return (
384
+ (REWARD_POSITIVE if is_correct else 0.0),
385
+ ("correct" if is_correct else "incorrect"),
386
+ is_correct,
387
+ )
378
388
 
379
389
 
380
390
  math_router = APIRouter()
381
391
 
382
392
 
393
+ def _preview_tool_calls(tool_calls: Sequence[Mapping[str, Any]]) -> list[Dict[str, Any]]:
394
+ """Return a compact, log-friendly preview of tool calls.
395
+
396
+ Truncates long fields to avoid noisy logs and leaking excessive content.
397
+ """
398
+ preview: list[Dict[str, Any]] = []
399
+ for call in list(tool_calls or [])[:3]:
400
+ args = dict(call.get("args") or {})
401
+ answer = str(args.get("answer") or "")
402
+ # Hard truncate to keep logs compact
403
+ answer_short = answer[:120] + ("…" if len(answer) > 120 else "")
404
+ preview.append(
405
+ {
406
+ "tool": call.get("tool"),
407
+ "answer": answer_short,
408
+ }
409
+ )
410
+ return preview
411
+
412
+
413
+ def _event_and_outcome_components(
414
+ tool_calls: Sequence[Mapping[str, Any]], *, correct: bool, reward: float
415
+ ) -> Dict[str, float]:
416
+ """Approximate component-wise scores for RL-style logs.
417
+
418
+ - env: task-level scalar reward (our single-step outcome)
419
+ - rubric_event: 1.0 if a valid tool call with non-empty answer was made else 0.0
420
+ - rubric_outcome: 1.0 if final answer was correct else 0.0
421
+ """
422
+ has_valid_tool = False
423
+ if tool_calls:
424
+ first = tool_calls[0] or {}
425
+ if str(first.get("tool") or "") == TOOL_NAME:
426
+ args = first.get("args") or {}
427
+ ans = str(args.get("answer") or "").strip()
428
+ has_valid_tool = bool(ans)
429
+ return {
430
+ "env": float(reward),
431
+ "rubric_event": 1.0 if has_valid_tool else 0.0,
432
+ "rubric_outcome": 1.0 if bool(correct) else 0.0,
433
+ }
434
+
435
+
383
436
  @math_router.post("/env/math/initialize")
384
437
  async def initialize_env(request: Request, payload: InitializePayload) -> Dict[str, Any]:
385
438
  manager: MathEnvironmentManager = request.app.state.math_env_manager
@@ -410,6 +463,28 @@ async def step_env(request: Request, payload: Dict[str, Any]) -> Dict[str, Any]:
410
463
  action = payload.get("action") or {}
411
464
  tool_calls = action.get("tool_calls") or payload.get("tool_calls") or []
412
465
  reward, status, correct = _score_submission(state, tool_calls)
466
+ try:
467
+ print(
468
+ "[MATH_STEP] env_id=",
469
+ state.env_id,
470
+ " split=",
471
+ state.split,
472
+ " index=",
473
+ state.index,
474
+ " calls=",
475
+ _preview_tool_calls(tool_calls),
476
+ " reward=",
477
+ reward,
478
+ " status=",
479
+ status,
480
+ " correct=",
481
+ correct,
482
+ " components=",
483
+ _event_and_outcome_components(tool_calls, correct=correct, reward=reward),
484
+ flush=True,
485
+ )
486
+ except Exception:
487
+ pass
413
488
  state.done = True
414
489
 
415
490
  observation = _observation_from_state(state)
@@ -448,7 +523,9 @@ def _resolve_inference_url(base_url: str) -> str:
448
523
  return f"{normalized}/v1/chat/completions"
449
524
 
450
525
 
451
- async def _call_inference(policy_config: Mapping[str, Any], observation: Mapping[str, Any]) -> tuple[list[Dict[str, Any]], Dict[str, Any]]:
526
+ async def _call_inference(
527
+ policy_config: Mapping[str, Any], observation: Mapping[str, Any]
528
+ ) -> tuple[list[Dict[str, Any]], Dict[str, Any]]:
452
529
  inference_url = str(policy_config.get("inference_url") or "").rstrip("/")
453
530
  if not inference_url:
454
531
  raise RuntimeError("policy.config.inference_url required for rollout")
@@ -562,6 +639,17 @@ async def _call_inference(policy_config: Mapping[str, Any], observation: Mapping
562
639
  else:
563
640
  parsed_args = {}
564
641
  tool_calls.append({"tool": name, "args": parsed_args})
642
+ # Lightweight provider-side logging
643
+ try:
644
+ print(
645
+ "[MATH_INFER] model=",
646
+ model,
647
+ " calls=",
648
+ _preview_tool_calls(tool_calls),
649
+ flush=True,
650
+ )
651
+ except Exception:
652
+ pass
565
653
  return tool_calls, data
566
654
 
567
655
 
@@ -580,7 +668,9 @@ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) ->
580
668
  inference_payload: Dict[str, Any] | None = None
581
669
  error_info: Dict[str, Any] = {}
582
670
  try:
583
- tool_calls, inference_payload = await _call_inference(request.policy.config or {}, observation)
671
+ tool_calls, inference_payload = await _call_inference(
672
+ request.policy.config or {}, observation
673
+ )
584
674
  except HTTPException as http_err:
585
675
  tool_calls = []
586
676
  error_info = {"error": http_err.detail, "code": http_err.status_code}
@@ -600,6 +690,30 @@ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) ->
600
690
  tool_calls,
601
691
  )
602
692
 
693
+ # Log a concise summary so we can debug reward=0 issues in production
694
+ try:
695
+ print(
696
+ "[MATH_ROLLOUT] run=",
697
+ request.run_id,
698
+ " split=",
699
+ sample["split"],
700
+ " index=",
701
+ sample["index"],
702
+ " calls=",
703
+ _preview_tool_calls(tool_calls),
704
+ " reward=",
705
+ reward,
706
+ " status=",
707
+ status,
708
+ " correct=",
709
+ correct,
710
+ " components=",
711
+ _event_and_outcome_components(tool_calls, correct=correct, reward=reward),
712
+ flush=True,
713
+ )
714
+ except Exception:
715
+ pass
716
+
603
717
  step = RolloutStep(
604
718
  obs=observation,
605
719
  tool_calls=tool_calls,
@@ -610,6 +724,7 @@ async def rollout_executor(request: RolloutRequest, fastapi_request: Request) ->
610
724
  "status": status,
611
725
  "correct": correct,
612
726
  "raw_solution": sample["raw_solution"],
727
+ "tool_call_preview": _preview_tool_calls(tool_calls),
613
728
  **error_info,
614
729
  },
615
730
  )
@@ -775,7 +890,9 @@ def build_config() -> TaskAppConfig:
775
890
 
776
891
  tracing_enabled = tracing_env_enabled()
777
892
  tracing_db_url = resolve_tracing_db_url()
778
- tracer_factory = build_tracer_factory(SessionTracer, enabled=tracing_enabled, db_url=tracing_db_url)
893
+ tracer_factory = build_tracer_factory(
894
+ SessionTracer, enabled=tracing_enabled, db_url=tracing_db_url
895
+ )
779
896
  sft_output_dir = resolve_sft_output_dir()
780
897
 
781
898
  app_state: Dict[str, Any] = {
@@ -40,7 +40,10 @@ def fastapi_app():
40
40
  async def health(request: Request):
41
41
  env_key = normalize_environment_api_key()
42
42
  if not env_key:
43
- return JSONResponse(status_code=503, content={"status": "unhealthy", "detail": "Missing ENVIRONMENT_API_KEY"})
43
+ return JSONResponse(
44
+ status_code=503,
45
+ content={"status": "unhealthy", "detail": "Missing ENVIRONMENT_API_KEY"},
46
+ )
44
47
  if not is_api_key_header_authorized(request):
45
48
  prefix = _log_env_key_prefix("health", env_key)
46
49
  content = {"status": "healthy", "authorized": False}
@@ -53,7 +56,10 @@ def fastapi_app():
53
56
  async def health_rollout(request: Request):
54
57
  env_key = normalize_environment_api_key()
55
58
  if not env_key:
56
- return JSONResponse(status_code=503, content={"status": "unhealthy", "detail": "Missing ENVIRONMENT_API_KEY"})
59
+ return JSONResponse(
60
+ status_code=503,
61
+ content={"status": "unhealthy", "detail": "Missing ENVIRONMENT_API_KEY"},
62
+ )
57
63
  if not is_api_key_header_authorized(request):
58
64
  prefix = _log_env_key_prefix("health/rollout", env_key)
59
65
  content = {"status": "healthy", "authorized": False}
@@ -76,7 +82,9 @@ def fastapi_app():
76
82
  print("[422] validation", snapshot, flush=True)
77
83
  except Exception:
78
84
  pass
79
- return JSONResponse(status_code=422, content={"status": "invalid", "detail": exc.errors()[:5]})
85
+ return JSONResponse(
86
+ status_code=422, content={"status": "invalid", "detail": exc.errors()[:5]}
87
+ )
80
88
 
81
89
  return app
82
90