synth-ai 0.2.9.dev4__py3-none-any.whl → 0.2.9.dev7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (157) hide show
  1. examples/common_old/backend.py +0 -1
  2. examples/crafter_debug_render.py +15 -6
  3. examples/evals_old/compare_models.py +1 -0
  4. examples/finetuning_old/_backup_synth_qwen/filter_traces_achievements.py +6 -2
  5. examples/finetuning_old/_backup_synth_qwen/react_agent_lm.py +4 -4
  6. examples/finetuning_old/_backup_synth_qwen/sft_kickoff.py +4 -3
  7. examples/finetuning_old/synth_qwen_v1/filter_traces_achievements.py +6 -2
  8. examples/finetuning_old/synth_qwen_v1/finetune.py +1 -1
  9. examples/finetuning_old/synth_qwen_v1/hello_ft_model.py +4 -4
  10. examples/finetuning_old/synth_qwen_v1/infer.py +1 -2
  11. examples/finetuning_old/synth_qwen_v1/poll.py +4 -2
  12. examples/finetuning_old/synth_qwen_v1/prepare_data.py +8 -8
  13. examples/finetuning_old/synth_qwen_v1/react_agent_lm.py +5 -4
  14. examples/finetuning_old/synth_qwen_v1/run_crafter_sft_job.py +11 -8
  15. examples/finetuning_old/synth_qwen_v1/run_ft_job.py +17 -12
  16. examples/finetuning_old/synth_qwen_v1/upload_data.py +1 -1
  17. examples/finetuning_old/synth_qwen_v1/util.py +7 -2
  18. examples/rl/configs/eval_base_qwen.toml +1 -1
  19. examples/rl/configs/rl_from_base_qwen17.toml +1 -1
  20. examples/rl/download_dataset.py +26 -10
  21. examples/rl/run_eval.py +17 -15
  22. examples/rl/run_rl_and_save.py +24 -7
  23. examples/rl/task_app/math_single_step.py +128 -11
  24. examples/rl/task_app/math_task_app.py +11 -3
  25. examples/rl_old/task_app.py +222 -53
  26. examples/warming_up_to_rl/analyze_trace_db.py +7 -5
  27. examples/warming_up_to_rl/export_trace_sft.py +141 -16
  28. examples/warming_up_to_rl/groq_test.py +11 -4
  29. examples/warming_up_to_rl/manage_secrets.py +15 -6
  30. examples/warming_up_to_rl/readme.md +9 -2
  31. examples/warming_up_to_rl/run_eval.py +108 -30
  32. examples/warming_up_to_rl/run_fft_and_save.py +128 -52
  33. examples/warming_up_to_rl/run_local_rollout.py +87 -36
  34. examples/warming_up_to_rl/run_local_rollout_modal.py +113 -25
  35. examples/warming_up_to_rl/run_local_rollout_parallel.py +80 -16
  36. examples/warming_up_to_rl/run_local_rollout_traced.py +125 -20
  37. examples/warming_up_to_rl/run_rl_and_save.py +31 -7
  38. examples/warming_up_to_rl/run_rollout_remote.py +37 -10
  39. examples/warming_up_to_rl/task_app/grpo_crafter.py +90 -27
  40. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +9 -27
  41. examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +46 -108
  42. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -1
  43. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +1 -1
  44. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -1
  45. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +50 -17
  46. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +35 -21
  47. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +8 -4
  48. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +29 -26
  49. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +1 -1
  50. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +17 -13
  51. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +1 -1
  52. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +106 -63
  53. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +82 -84
  54. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +76 -59
  55. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +1 -1
  56. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +43 -49
  57. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +5 -15
  58. synth_ai/__init__.py +1 -0
  59. synth_ai/api/train/builders.py +34 -10
  60. synth_ai/api/train/cli.py +172 -32
  61. synth_ai/api/train/config_finder.py +59 -4
  62. synth_ai/api/train/env_resolver.py +32 -14
  63. synth_ai/api/train/pollers.py +11 -3
  64. synth_ai/api/train/task_app.py +4 -1
  65. synth_ai/api/train/utils.py +20 -4
  66. synth_ai/cli/__init__.py +11 -4
  67. synth_ai/cli/balance.py +1 -1
  68. synth_ai/cli/demo.py +19 -5
  69. synth_ai/cli/rl_demo.py +75 -16
  70. synth_ai/cli/root.py +116 -37
  71. synth_ai/cli/task_apps.py +1286 -170
  72. synth_ai/cli/traces.py +1 -0
  73. synth_ai/cli/turso.py +73 -0
  74. synth_ai/core/experiment.py +0 -2
  75. synth_ai/demo_registry.py +67 -30
  76. synth_ai/demos/core/cli.py +493 -164
  77. synth_ai/demos/demo_task_apps/core.py +50 -6
  78. synth_ai/demos/demo_task_apps/crafter/configs/crafter_fft_4b.toml +2 -3
  79. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +36 -28
  80. synth_ai/demos/demo_task_apps/math/_common.py +1 -2
  81. synth_ai/demos/demo_task_apps/math/deploy_modal.py +0 -2
  82. synth_ai/demos/demo_task_apps/math/modal_task_app.py +168 -65
  83. synth_ai/demos/demo_task_apps/math/task_app_entry.py +0 -1
  84. synth_ai/environments/examples/bandit/engine.py +12 -4
  85. synth_ai/environments/examples/bandit/taskset.py +4 -4
  86. synth_ai/environments/reproducibility/tree.py +3 -1
  87. synth_ai/environments/service/core_routes.py +6 -2
  88. synth_ai/evals/base.py +0 -2
  89. synth_ai/experimental/synth_oss.py +11 -12
  90. synth_ai/handshake.py +3 -1
  91. synth_ai/http_client.py +31 -7
  92. synth_ai/inference/__init__.py +0 -2
  93. synth_ai/inference/client.py +8 -4
  94. synth_ai/jobs/client.py +40 -10
  95. synth_ai/learning/client.py +33 -8
  96. synth_ai/learning/config.py +0 -2
  97. synth_ai/learning/constants.py +0 -2
  98. synth_ai/learning/ft_client.py +6 -3
  99. synth_ai/learning/health.py +9 -2
  100. synth_ai/learning/jobs.py +17 -5
  101. synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +1 -3
  102. synth_ai/learning/prompts/random_search.py +4 -1
  103. synth_ai/learning/prompts/run_random_search_banking77.py +6 -1
  104. synth_ai/learning/rl_client.py +42 -14
  105. synth_ai/learning/sse.py +0 -2
  106. synth_ai/learning/validators.py +6 -2
  107. synth_ai/lm/caching/ephemeral.py +1 -3
  108. synth_ai/lm/core/exceptions.py +0 -2
  109. synth_ai/lm/core/main.py +13 -1
  110. synth_ai/lm/core/synth_models.py +0 -1
  111. synth_ai/lm/core/vendor_clients.py +4 -2
  112. synth_ai/lm/overrides.py +2 -2
  113. synth_ai/lm/vendors/core/anthropic_api.py +7 -7
  114. synth_ai/lm/vendors/core/openai_api.py +2 -0
  115. synth_ai/lm/vendors/openai_standard.py +3 -1
  116. synth_ai/lm/vendors/openai_standard_responses.py +6 -3
  117. synth_ai/lm/vendors/supported/custom_endpoint.py +1 -3
  118. synth_ai/lm/vendors/synth_client.py +37 -10
  119. synth_ai/rl/__init__.py +0 -1
  120. synth_ai/rl/contracts.py +0 -2
  121. synth_ai/rl/env_keys.py +6 -1
  122. synth_ai/task/__init__.py +1 -0
  123. synth_ai/task/apps/__init__.py +11 -11
  124. synth_ai/task/auth.py +29 -17
  125. synth_ai/task/client.py +3 -1
  126. synth_ai/task/contracts.py +1 -0
  127. synth_ai/task/datasets.py +3 -1
  128. synth_ai/task/errors.py +3 -2
  129. synth_ai/task/health.py +0 -2
  130. synth_ai/task/json.py +0 -1
  131. synth_ai/task/proxy.py +2 -5
  132. synth_ai/task/rubrics.py +9 -3
  133. synth_ai/task/server.py +31 -5
  134. synth_ai/task/tracing_utils.py +8 -3
  135. synth_ai/task/validators.py +0 -1
  136. synth_ai/task/vendors.py +0 -1
  137. synth_ai/tracing_v3/db_config.py +26 -1
  138. synth_ai/tracing_v3/decorators.py +1 -0
  139. synth_ai/tracing_v3/examples/basic_usage.py +3 -2
  140. synth_ai/tracing_v3/hooks.py +2 -0
  141. synth_ai/tracing_v3/replica_sync.py +1 -0
  142. synth_ai/tracing_v3/session_tracer.py +24 -3
  143. synth_ai/tracing_v3/storage/base.py +4 -1
  144. synth_ai/tracing_v3/storage/factory.py +0 -1
  145. synth_ai/tracing_v3/turso/manager.py +102 -38
  146. synth_ai/tracing_v3/turso/models.py +4 -1
  147. synth_ai/tracing_v3/utils.py +1 -0
  148. synth_ai/v0/tracing/upload.py +32 -135
  149. {synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev7.dist-info}/METADATA +1 -1
  150. {synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev7.dist-info}/RECORD +154 -156
  151. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_stepwise_rewards.py +0 -58
  152. synth_ai/environments/examples/sokoban/units/astar_common.py +0 -95
  153. synth_ai/install_sqld.sh +0 -40
  154. {synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev7.dist-info}/WHEEL +0 -0
  155. {synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev7.dist-info}/entry_points.txt +0 -0
  156. {synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev7.dist-info}/licenses/LICENSE +0 -0
  157. {synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev7.dist-info}/top_level.txt +0 -0
@@ -15,7 +15,15 @@ if "/opt" not in _sys.path:
15
15
  _sys.path.insert(0, "/opt")
16
16
 
17
17
  # Use environment-aware names to avoid collisions across dev/prod
18
- _env_flag = (_os.getenv("SYNTH_BACKEND_URL_OVERRIDE", "") or _os.getenv("ENVIRONMENT", "") or _os.getenv("APP_ENVIRONMENT", "")).strip().lower()
18
+ _env_flag = (
19
+ (
20
+ _os.getenv("SYNTH_BACKEND_URL_OVERRIDE", "")
21
+ or _os.getenv("ENVIRONMENT", "")
22
+ or _os.getenv("APP_ENVIRONMENT", "")
23
+ )
24
+ .strip()
25
+ .lower()
26
+ )
19
27
  _is_prod = _env_flag in ("prod", "production")
20
28
 
21
29
  # Secret name must be provided explicitly via TASK_APP_SECRET_NAME
@@ -85,7 +93,9 @@ image = (
85
93
  ]
86
94
  )
87
95
  # Bundle the crafter module into the image for imports at runtime (absolute path)
88
- .add_local_dir(str((_HERE.parent / "crafter_task_app_helpers").resolve()), "/opt/crafter_task_app_helpers")
96
+ .add_local_dir(
97
+ str((_HERE.parent / "crafter_task_app_helpers").resolve()), "/opt/crafter_task_app_helpers"
98
+ )
89
99
  # Bundle synth_ai package to import full environment implementation.
90
100
  # Resolve repo root robustly (examples/rl/task_app.py -> repo_root = examples/rl/../../..)
91
101
  .add_local_dir(str((_HERE.parent.parent.parent / "synth_ai").resolve()), "/opt/synth_ai")
@@ -102,7 +112,10 @@ OPENAI_REMOVE_FIELDS = (
102
112
  OPENAI_REMOVE_SAMPLING_FIELDS = ("temperature", "top_p")
103
113
  OPENAI_TOOL_CHOICE_FORCED = {"type": "function", "function": {"name": "interact"}}
104
114
 
105
- def prepare_inference_payload_for_model(model: str | None, payload: dict[str, Any]) -> dict[str, Any]:
115
+
116
+ def prepare_inference_payload_for_model(
117
+ model: str | None, payload: dict[str, Any]
118
+ ) -> dict[str, Any]:
106
119
  """Sanitize payload for OpenAI API.
107
120
 
108
121
  - Always strip Synth-specific fields not supported by OpenAI (e.g., stop_after_tool_calls).
@@ -132,7 +145,13 @@ def prepare_inference_payload_for_model(model: str | None, payload: dict[str, An
132
145
  out["parallel_tool_calls"] = False
133
146
  return out
134
147
 
135
- @app.function(image=image, secrets=[modal.Secret.from_name(MODAL_SECRET_NAME)], min_containers=1, max_containers=1)
148
+
149
+ @app.function(
150
+ image=image,
151
+ secrets=[modal.Secret.from_name(MODAL_SECRET_NAME)],
152
+ min_containers=1,
153
+ max_containers=1,
154
+ )
136
155
  @modal.asgi_app()
137
156
  def fastapi_app():
138
157
  # Import FastAPI/Pydantic inside the container runtime to avoid local import errors
@@ -144,6 +163,7 @@ def fastapi_app():
144
163
  import sys
145
164
  import os
146
165
  import httpx
166
+
147
167
  # Logger for debug output
148
168
  logger = logging.getLogger(__name__)
149
169
 
@@ -154,6 +174,7 @@ def fastapi_app():
154
174
  os.environ.setdefault("TURSO_LOCAL_DB_URL", "sqlite+aiosqlite:////tmp/synth_ai.db")
155
175
 
156
176
  import importlib
177
+
157
178
  preload_modules = [
158
179
  # synth_ai core
159
180
  "synth_ai",
@@ -254,10 +275,14 @@ def fastapi_app():
254
275
  def health(request: Request):
255
276
  env_key = os.environ.get("ENVIRONMENT_API_KEY")
256
277
  if not env_key:
257
- raise HTTPException(status_code=503, detail="Auth not configured: missing ENVIRONMENT_API_KEY in task service environment")
278
+ raise HTTPException(
279
+ status_code=503,
280
+ detail="Auth not configured: missing ENVIRONMENT_API_KEY in task service environment",
281
+ )
258
282
  # Authorize using all header variants; avoid typed Header to prevent 422s
259
283
  try:
260
284
  from synth_ai.task.auth import is_api_key_header_authorized
285
+
261
286
  authorized = is_api_key_header_authorized(request)
262
287
  except Exception:
263
288
  # Fallback: check only x-api-key
@@ -275,9 +300,13 @@ def fastapi_app():
275
300
  def health_rollout(request: Request):
276
301
  expected = os.environ.get("ENVIRONMENT_API_KEY")
277
302
  if not expected:
278
- raise HTTPException(status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail="Missing ENVIRONMENT_API_KEY in service env")
303
+ raise HTTPException(
304
+ status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
305
+ detail="Missing ENVIRONMENT_API_KEY in service env",
306
+ )
279
307
  try:
280
308
  from synth_ai.task.auth import is_api_key_header_authorized
309
+
281
310
  authorized = is_api_key_header_authorized(request)
282
311
  except Exception:
283
312
  header_key = request.headers.get("x-api-key")
@@ -290,6 +319,7 @@ def fastapi_app():
290
319
 
291
320
  # Log and surface 422 validation errors with header presence
292
321
  from fastapi.exceptions import RequestValidationError
322
+
293
323
  @api.exception_handler(RequestValidationError)
294
324
  async def _on_validation_error(request: Request, exc: RequestValidationError):
295
325
  try:
@@ -304,7 +334,9 @@ def fastapi_app():
304
334
  print("[422] validation", snapshot, flush=True)
305
335
  except Exception:
306
336
  pass
307
- return JSONResponse(status_code=422, content={"status": "invalid", "detail": exc.errors()[:5]})
337
+ return JSONResponse(
338
+ status_code=422, content={"status": "invalid", "detail": exc.errors()[:5]}
339
+ )
308
340
 
309
341
  @api.post(f"/env/{ENV_NAME}/initialize")
310
342
  async def initialize(req: InitRequest, request: Request):
@@ -337,14 +369,19 @@ def fastapi_app():
337
369
  def proxy_chat_completions(req: dict[str, Any]):
338
370
  openai_key = os.environ.get("OPENAI_API_KEY")
339
371
  if not openai_key:
340
- raise HTTPException(status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail="Missing OPENAI_API_KEY in task service environment")
372
+ raise HTTPException(
373
+ status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
374
+ detail="Missing OPENAI_API_KEY in task service environment",
375
+ )
341
376
  # Sanitize payload for OpenAI models (e.g., gpt-5-*)
342
377
  model = req.get("model")
343
378
  payload = prepare_inference_payload_for_model(model, req)
344
379
  headers = {"Authorization": f"Bearer {openai_key}"}
345
380
  # Increase timeout for proxy calls (models may be slower)
346
381
  with httpx.Client(timeout=120.0) as client:
347
- resp = client.post("https://api.openai.com/v1/chat/completions", json=payload, headers=headers)
382
+ resp = client.post(
383
+ "https://api.openai.com/v1/chat/completions", json=payload, headers=headers
384
+ )
348
385
  try:
349
386
  data = resp.json()
350
387
  except Exception:
@@ -371,7 +408,10 @@ def fastapi_app():
371
408
  expected = os.environ.get("ENVIRONMENT_API_KEY")
372
409
  if not expected:
373
410
  logger.error("rollout.auth.misconfigured: missing ENVIRONMENT_API_KEY")
374
- raise HTTPException(status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail="Auth not configured: missing ENVIRONMENT_API_KEY")
411
+ raise HTTPException(
412
+ status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
413
+ detail="Auth not configured: missing ENVIRONMENT_API_KEY",
414
+ )
375
415
  # Compute masked diagnostics (never log full keys)
376
416
  try:
377
417
  exp_len = len(expected)
@@ -385,7 +425,11 @@ def fastapi_app():
385
425
  candidates = [c for c in [single, bearer, *multi] if c]
386
426
  # Assert server sees ALL keys sent by client
387
427
  if multi:
388
- logger.info("rollout.auth.candidates: n=%s first15=%s", len(candidates), [c[:15] for c in candidates])
428
+ logger.info(
429
+ "rollout.auth.candidates: n=%s first15=%s",
430
+ len(candidates),
431
+ [c[:15] for c in candidates],
432
+ )
389
433
  got_len = len(single or bearer or "")
390
434
  got_suf = (single or bearer or "")[-5:] if got_len >= 5 else ""
391
435
  except Exception:
@@ -398,13 +442,22 @@ def fastapi_app():
398
442
  if not authorized:
399
443
  logger.warning(
400
444
  "rollout.auth.failed: have_any=%s expect_len=%s expect_last5=%s got_len=%s got_last5=%s",
401
- bool(candidates), exp_len, exp_suf, got_len, got_suf,
445
+ bool(candidates),
446
+ exp_len,
447
+ exp_suf,
448
+ got_len,
449
+ got_suf,
450
+ )
451
+ raise HTTPException(
452
+ status_code=status.HTTP_401_UNAUTHORIZED, detail="Invalid or missing API key"
402
453
  )
403
- raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="Invalid or missing API key")
404
454
  else:
405
455
  logger.info(
406
456
  "rollout.auth.ok: expect_len=%s expect_last5=%s got_len=%s got_last5=%s",
407
- exp_len, exp_suf, got_len, got_suf,
457
+ exp_len,
458
+ exp_suf,
459
+ got_len,
460
+ got_suf,
408
461
  )
409
462
 
410
463
  # Extract policy config
@@ -457,11 +510,15 @@ def fastapi_app():
457
510
  # Build mapping using crafter's internal ids
458
511
  import itertools as _it
459
512
  import crafter as _crafter
513
+
460
514
  dummy = None
461
515
  try:
462
516
  dummy = _crafter.Env()
463
517
  max_id = (
464
- max(max(dummy._world._mat_ids.values()), max(dummy._sem_view._obj_ids.values()))
518
+ max(
519
+ max(dummy._world._mat_ids.values()),
520
+ max(dummy._sem_view._obj_ids.values()),
521
+ )
465
522
  + 1
466
523
  )
467
524
  id_to_item = ["void"] * max_id
@@ -516,8 +573,14 @@ def fastapi_app():
516
573
  if ach:
517
574
  all_achievements = list(ach.keys())
518
575
  lines.append(f"achievements_available: {', '.join(all_achievements)}")
519
- lines.append(f"achievements_unlocked: {', '.join(ach_on)}" if ach_on else "achievements_unlocked: ")
520
- lines.append(f"achievements_progress: {len(ach_on)}/{len(all_achievements)}")
576
+ lines.append(
577
+ f"achievements_unlocked: {', '.join(ach_on)}"
578
+ if ach_on
579
+ else "achievements_unlocked: "
580
+ )
581
+ lines.append(
582
+ f"achievements_progress: {len(ach_on)}/{len(all_achievements)}"
583
+ )
521
584
  # Local surroundings (7x7) using semantic_map
522
585
  smap = obs.get("semantic_map")
523
586
  if smap is not None and pos is not None:
@@ -539,7 +602,11 @@ def fastapi_app():
539
602
  row.append("player")
540
603
  else:
541
604
  idx = int(smap[x, y])
542
- name = id_to_item[idx] if 0 <= idx < len(id_to_item) else str(idx)
605
+ name = (
606
+ id_to_item[idx]
607
+ if 0 <= idx < len(id_to_item)
608
+ else str(idx)
609
+ )
543
610
  row.append(name)
544
611
  matrix.append(row)
545
612
  # Transpose to match visual orientation
@@ -554,6 +621,7 @@ def fastapi_app():
554
621
  if not lines:
555
622
  lines.append("no salient state; explore to gather context")
556
623
  return "\n".join(lines)
624
+
557
625
  # Build compact context from last few tool calls (gpt-5-nano friendly)
558
626
  lines: list[str] = []
559
627
  for rec in reversed(trajectory_steps):
@@ -568,10 +636,14 @@ def fastapi_app():
568
636
  name = tc0.get("tool_name") or tc0.get("name") or "unknown"
569
637
  args = tc0.get("arguments")
570
638
  lines.append(f"- {name}: {args}")
571
- context_text = "Previous tool calls (most recent first):\n" + ("\n".join(lines) if lines else "- none")
639
+ context_text = "Previous tool calls (most recent first):\n" + (
640
+ "\n".join(lines) if lines else "- none"
641
+ )
572
642
  obs_text = _format_obs(observation)
573
643
  combined_text = f"Current observation:\n{obs_text}\n\n{context_text}"
574
- payload = policy.build_inference_request(combined_text, history=[], turn=len(trajectory_steps))
644
+ payload = policy.build_inference_request(
645
+ combined_text, history=[], turn=len(trajectory_steps)
646
+ )
575
647
  # Debug: print the full prompt content in a stable labeled block for grepability
576
648
  try:
577
649
  print("PROMPT_DUMP_BEGIN")
@@ -593,14 +665,20 @@ def fastapi_app():
593
665
  except Exception:
594
666
  pass
595
667
  try:
596
- _ach = observation.get("achievements_status") if isinstance(observation, dict) else {}
668
+ _ach = (
669
+ observation.get("achievements_status")
670
+ if isinstance(observation, dict)
671
+ else {}
672
+ )
597
673
  _ach_on = [k for k, v in (_ach or {}).items() if v]
598
674
  print(f"[task:crafter] achievements_unlocked: {_ach_on}", flush=True)
599
675
  except Exception:
600
676
  pass
677
+
601
678
  # Prepare payload based on model family (OpenAI vs vLLM)
602
679
  def _prepare_payload(p: dict, mdl: str | None) -> dict:
603
680
  return prepare_inference_payload_for_model(mdl, p)
681
+
604
682
  # Debug: payload shape
605
683
  print(
606
684
  "[task:crafter] inference payload: ",
@@ -619,7 +697,7 @@ def fastapi_app():
619
697
  _timeouts = httpx.Timeout(connect=10.0, read=180.0, write=60.0, pool=60.0)
620
698
  with httpx.Client(timeout=_timeouts) as client:
621
699
  # Decide endpoint: avoid calling our own /proxy inside the same request
622
- _direct = ("api.openai.com" in inference_url)
700
+ _direct = "api.openai.com" in inference_url
623
701
  if _direct:
624
702
  # Call OpenAI directly
625
703
  if _okey:
@@ -639,6 +717,7 @@ def fastapi_app():
639
717
  # Debug: outbound request diagnostics
640
718
  try:
641
719
  import json as _json
720
+
642
721
  _size = len(_json.dumps(to_send))
643
722
  except Exception:
644
723
  _size = -1
@@ -647,7 +726,12 @@ def fastapi_app():
647
726
  {
648
727
  "endpoint": f"{endpoint_base.rstrip('/')}/v1/chat/completions",
649
728
  "direct_openai": bool(_direct),
650
- "timeout": {"read": 180.0, "connect": 10.0, "write": 60.0, "pool": 60.0},
729
+ "timeout": {
730
+ "read": 180.0,
731
+ "connect": 10.0,
732
+ "write": 60.0,
733
+ "pool": 60.0,
734
+ },
651
735
  "payload_bytes": _size,
652
736
  "has_auth": bool(headers.get("Authorization")),
653
737
  },
@@ -663,14 +747,23 @@ def fastapi_app():
663
747
  )
664
748
  except httpx.ReadTimeout as rte:
665
749
  _elapsed = time.time() - _t0
666
- print(f"[task:crafter][timeout] read timeout after {_elapsed:.1f}s: {rte}", flush=True)
750
+ print(
751
+ f"[task:crafter][timeout] read timeout after {_elapsed:.1f}s: {rte}",
752
+ flush=True,
753
+ )
667
754
  raise
668
755
  except Exception as re:
669
756
  _elapsed = time.time() - _t0
670
- print(f"[task:crafter][error] request failed after {_elapsed:.1f}s: {type(re).__name__}: {re}", flush=True)
757
+ print(
758
+ f"[task:crafter][error] request failed after {_elapsed:.1f}s: {type(re).__name__}: {re}",
759
+ flush=True,
760
+ )
671
761
  raise
672
762
  _elapsed = time.time() - _t0
673
- print(f"[task:crafter] inference status= {resp.status_code} elapsed={_elapsed:.2f}s", flush=True)
763
+ print(
764
+ f"[task:crafter] inference status= {resp.status_code} elapsed={_elapsed:.2f}s",
765
+ flush=True,
766
+ )
674
767
  # Emit a light-weight perf snapshot for visibility
675
768
  try:
676
769
  print(
@@ -702,6 +795,7 @@ def fastapi_app():
702
795
  # Print full tool call payloads for inspection
703
796
  try:
704
797
  import json as _json
798
+
705
799
  for _i, _tc in enumerate(parsed):
706
800
  try:
707
801
  print(
@@ -717,8 +811,11 @@ def fastapi_app():
717
811
  # Dump compact body preview to understand schema when no tools parsed
718
812
  try:
719
813
  import json as _json
720
- preview = _json.dumps(data, separators=(",",":"))
721
- print("[task:crafter] body(no_tools) preview:", preview[:800], flush=True)
814
+
815
+ preview = _json.dumps(data, separators=(",", ":"))
816
+ print(
817
+ "[task:crafter] body(no_tools) preview:", preview[:800], flush=True
818
+ )
722
819
  except Exception:
723
820
  pass
724
821
  # Early terminate the episode to avoid hanging on empty tool calls
@@ -736,6 +833,7 @@ def fastapi_app():
736
833
  if name == "interact":
737
834
  # Parse the JSON arguments string
738
835
  import json
836
+
739
837
  args_str = tc.get("arguments", "{}")
740
838
  try:
741
839
  args_dict = json.loads(args_str)
@@ -743,7 +841,10 @@ def fastapi_app():
743
841
  reasoning = args_dict.get("reasoning", "")
744
842
  print(f"[task:crafter] reasoning: {reasoning}", flush=True)
745
843
  except (json.JSONDecodeError, TypeError):
746
- print(f"[task:crafter] ERROR: Failed to parse arguments: {args_str}", flush=True)
844
+ print(
845
+ f"[task:crafter] ERROR: Failed to parse arguments: {args_str}",
846
+ flush=True,
847
+ )
747
848
  actions = []
748
849
  reasoning = "Parse error"
749
850
 
@@ -751,12 +852,18 @@ def fastapi_app():
751
852
  # Print a compact echo of the current prompt + tool call for easier triage
752
853
  try:
753
854
  import json as _json
754
- print("TOOLCALL_CONFIG:", _json.dumps({
755
- "policy": req.policy.policy_name,
756
- "tools_present": True,
757
- "tool_choice": "required",
758
- "stop_after": 1,
759
- }))
855
+
856
+ print(
857
+ "TOOLCALL_CONFIG:",
858
+ _json.dumps(
859
+ {
860
+ "policy": req.policy.policy_name,
861
+ "tools_present": True,
862
+ "tool_choice": "required",
863
+ "stop_after": 1,
864
+ }
865
+ ),
866
+ )
760
867
  except Exception:
761
868
  pass
762
869
 
@@ -768,42 +875,77 @@ def fastapi_app():
768
875
  total_reward += float(reward)
769
876
  # Debug: print step outcome (compact)
770
877
  try:
771
- ok = list(observation.keys()) if isinstance(observation, dict) else []
772
- print(f"[task:crafter] step => a={act} r={float(reward)} done={bool(done)} obs_keys={ok[:5]}", flush=True)
878
+ ok = (
879
+ list(observation.keys())
880
+ if isinstance(observation, dict)
881
+ else []
882
+ )
883
+ print(
884
+ f"[task:crafter] step => a={act} r={float(reward)} done={bool(done)} obs_keys={ok[:5]}",
885
+ flush=True,
886
+ )
773
887
  except Exception:
774
888
  pass
775
- step = RolloutStep(obs=observation, tool_calls=pending_tool_calls, reward=float(reward), done=bool(done), truncated=False, info=info)
889
+ step = RolloutStep(
890
+ obs=observation,
891
+ tool_calls=pending_tool_calls,
892
+ reward=float(reward),
893
+ done=bool(done),
894
+ truncated=False,
895
+ info=info,
896
+ )
776
897
  trajectory_steps.append(step)
777
898
  ops_executed += 1
778
899
 
779
900
  # Check for achievement-based termination
780
901
  if isinstance(observation, dict):
781
- current_achievements = observation.get("achievements_status", {})
902
+ current_achievements = observation.get(
903
+ "achievements_status", {}
904
+ )
782
905
  # Track flips 0→1 within this decision
783
906
  try:
784
907
  if not isinstance(current_achievements, dict):
785
908
  current_achievements = {}
786
909
  if prev_ach is None:
787
- prev_ach = {k: bool(v) for k, v in (current_achievements or {}).items()}
910
+ prev_ach = {
911
+ k: bool(v)
912
+ for k, v in (current_achievements or {}).items()
913
+ }
788
914
  else:
789
915
  for name, on in (current_achievements or {}).items():
790
916
  if bool(on) and not bool(prev_ach.get(name, False)):
791
917
  decision_flips.add(str(name))
792
918
  # Update prev_ach to latest snapshot
793
- prev_ach = {k: bool(v) for k, v in (current_achievements or {}).items()}
919
+ prev_ach = {
920
+ k: bool(v)
921
+ for k, v in (current_achievements or {}).items()
922
+ }
794
923
  except Exception:
795
924
  pass
796
- achieved_count = sum(1 for v in current_achievements.values() if v)
925
+ achieved_count = sum(
926
+ 1 for v in current_achievements.values() if v
927
+ )
797
928
  total_achievements = len(current_achievements)
798
929
 
799
930
  # Terminate if we've achieved a significant portion of available achievements
800
- if total_achievements > 0 and achieved_count >= max(3, total_achievements // 2):
801
- print(f"[task:crafter] achievement_termination: {achieved_count}/{total_achievements} achievements reached", flush=True)
802
- print(f"[task:crafter] achieved: {[k for k, v in current_achievements.items() if v]}", flush=True)
931
+ if total_achievements > 0 and achieved_count >= max(
932
+ 3, total_achievements // 2
933
+ ):
934
+ print(
935
+ f"[task:crafter] achievement_termination: {achieved_count}/{total_achievements} achievements reached",
936
+ flush=True,
937
+ )
938
+ print(
939
+ f"[task:crafter] achieved: {[k for k, v in current_achievements.items() if v]}",
940
+ flush=True,
941
+ )
803
942
  break
804
943
 
805
944
  if done or len(trajectory_steps) >= max_steps:
806
- print(f"[task:crafter] episode_end: done={bool(done)} steps={len(trajectory_steps)} total_reward={total_reward}", flush=True)
945
+ print(
946
+ f"[task:crafter] episode_end: done={bool(done)} steps={len(trajectory_steps)} total_reward={total_reward}",
947
+ flush=True,
948
+ )
807
949
  break
808
950
  elif name == "terminate":
809
951
  # Handle termination
@@ -812,7 +954,14 @@ def fastapi_app():
812
954
  else:
813
955
  # Non-interact tool call: count as a step without env change
814
956
  print("[task:crafter] non-interact tool_call:", name, flush=True)
815
- step = RolloutStep(obs=observation, tool_calls=pending_tool_calls, reward=None, done=False, truncated=False, info=info)
957
+ step = RolloutStep(
958
+ obs=observation,
959
+ tool_calls=pending_tool_calls,
960
+ reward=None,
961
+ done=False,
962
+ truncated=False,
963
+ info=info,
964
+ )
816
965
  trajectory_steps.append(step)
817
966
  ops_executed += 1
818
967
  # End of decision: record indicator_i for shaping
@@ -823,7 +972,10 @@ def fastapi_app():
823
972
  pass
824
973
  pending_tool_calls = None
825
974
  if len(trajectory_steps) >= max_steps:
826
- print(f"[task:crafter] max_steps_reached: steps={len(trajectory_steps)} total_reward={total_reward}", flush=True)
975
+ print(
976
+ f"[task:crafter] max_steps_reached: steps={len(trajectory_steps)} total_reward={total_reward}",
977
+ flush=True,
978
+ )
827
979
  break
828
980
  else:
829
981
  # Unknown op: skip
@@ -865,7 +1017,11 @@ def fastapi_app():
865
1017
  # Step-reward shaping: compute decision-level rewards if enabled
866
1018
  branches: dict[str, Any] = {}
867
1019
  try:
868
- sr_cfg = (req.record.config or {}).get("step_rewards") if isinstance(req.record, RolloutRecordConfig) else None
1020
+ sr_cfg = (
1021
+ (req.record.config or {}).get("step_rewards")
1022
+ if isinstance(req.record, RolloutRecordConfig)
1023
+ else None
1024
+ )
869
1025
  except Exception:
870
1026
  sr_cfg = None
871
1027
  try:
@@ -880,6 +1036,7 @@ def fastapi_app():
880
1036
  indicator_lambda = float(sr_cfg.get("indicator_lambda", 0.0))
881
1037
  # Env overrides
882
1038
  import os as _os2
1039
+
883
1040
  if _os2.getenv("STEP_BETA"):
884
1041
  step_beta = float(_os2.getenv("STEP_BETA"))
885
1042
  if _os2.getenv("STEP_LAMBDA"):
@@ -909,15 +1066,19 @@ def fastapi_app():
909
1066
  # Optional tracing of episode/rewards (gated)
910
1067
  try:
911
1068
  import os as _os3
1069
+
912
1070
  if _os3.getenv("TRACE_RL", "0") == "1":
913
1071
  from synth_ai.tracing_v3.session_tracer import SessionTracer # type: ignore
1072
+
914
1073
  tracer = SessionTracer()
915
1074
  await tracer.initialize()
916
1075
  meta = {
917
1076
  "env": req.env.env_name,
918
1077
  "policy": req.policy.policy_name,
919
1078
  "step_rewards": {
920
- "enabled": bool(sr_cfg.get("enabled", False)) if isinstance(sr_cfg, dict) else False,
1079
+ "enabled": bool(sr_cfg.get("enabled", False))
1080
+ if isinstance(sr_cfg, dict)
1081
+ else False,
921
1082
  "mode": (sr_cfg.get("mode") if isinstance(sr_cfg, dict) else None),
922
1083
  },
923
1084
  }
@@ -938,7 +1099,10 @@ def fastapi_app():
938
1099
  num_episodes=1,
939
1100
  )
940
1101
  # Debug: print reward and achievement metrics
941
- print(f"[task:crafter] Rollout metrics: total_reward={total_reward}, total_achievements={total_achievements}, mean_return={metrics.mean_return}, episode_returns={metrics.episode_returns}", flush=True)
1102
+ print(
1103
+ f"[task:crafter] Rollout metrics: total_reward={total_reward}, total_achievements={total_achievements}, mean_return={metrics.mean_return}, episode_returns={metrics.episode_returns}",
1104
+ flush=True,
1105
+ )
942
1106
  return RolloutResponse(
943
1107
  run_id=req.run_id,
944
1108
  trajectories=[trajectory],
@@ -952,11 +1116,16 @@ def fastapi_app():
952
1116
  def test_auth(request: Request):
953
1117
  expected = os.environ.get("ENVIRONMENT_API_KEY")
954
1118
  if not expected:
955
- raise HTTPException(status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail="Missing ENVIRONMENT_API_KEY in service env")
1119
+ raise HTTPException(
1120
+ status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
1121
+ detail="Missing ENVIRONMENT_API_KEY in service env",
1122
+ )
956
1123
  header_key = request.headers.get("x-api-key") or request.headers.get("X-API-Key")
957
1124
  ok = bool(header_key) and (header_key == expected)
958
1125
  if not ok:
959
- raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="Invalid or missing API key")
1126
+ raise HTTPException(
1127
+ status_code=status.HTTP_401_UNAUTHORIZED, detail="Invalid or missing API key"
1128
+ )
960
1129
  return {"ok": True}
961
1130
 
962
1131
  return api
@@ -229,7 +229,9 @@ def format_model_stats(stats: list[dict[str, Any]]) -> str:
229
229
  if not stats:
230
230
  return "No model usage recorded."
231
231
  lines = ["Model usage (by LLM calls):"]
232
- header = f"{'Model':30} {'Provider':10} {'Calls':>7} {'Tokens (in/out)':>20} {'Avg latency ms':>15}"
232
+ header = (
233
+ f"{'Model':30} {'Provider':10} {'Calls':>7} {'Tokens (in/out)':>20} {'Avg latency ms':>15}"
234
+ )
233
235
  lines.append(header)
234
236
  lines.append("-" * len(header))
235
237
  for item in stats:
@@ -243,9 +245,7 @@ def format_model_stats(stats: list[dict[str, Any]]) -> str:
243
245
  return "\n".join(lines)
244
246
 
245
247
 
246
- def format_achievement_summary(
247
- name_counts: Counter, size_counts: Counter
248
- ) -> str:
248
+ def format_achievement_summary(name_counts: Counter, size_counts: Counter) -> str:
249
249
  lines = ["Unique achievements unlocked:"]
250
250
  if name_counts:
251
251
  top = name_counts.most_common()
@@ -349,7 +349,9 @@ def format_model_achievement_stats(model_stats: dict[str, dict[str, Any]]) -> st
349
349
  return "Achievement stats by model:\n (no model sessions recorded)"
350
350
 
351
351
  lines = ["Achievement stats by model:"]
352
- for model_name in sorted(model_stats.keys(), key=lambda m: model_stats[m]["sessions"], reverse=True):
352
+ for model_name in sorted(
353
+ model_stats.keys(), key=lambda m: model_stats[m]["sessions"], reverse=True
354
+ ):
353
355
  stats = model_stats[model_name]
354
356
  providers = ", ".join(sorted(stats["providers"])) if stats["providers"] else "-"
355
357
  sessions = stats["sessions"]