synth-ai 0.2.9.dev5__py3-none-any.whl → 0.2.9.dev7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (155) hide show
  1. examples/common_old/backend.py +0 -1
  2. examples/crafter_debug_render.py +15 -6
  3. examples/evals_old/compare_models.py +1 -0
  4. examples/finetuning_old/_backup_synth_qwen/filter_traces_achievements.py +6 -2
  5. examples/finetuning_old/_backup_synth_qwen/react_agent_lm.py +4 -4
  6. examples/finetuning_old/_backup_synth_qwen/sft_kickoff.py +4 -3
  7. examples/finetuning_old/synth_qwen_v1/filter_traces_achievements.py +6 -2
  8. examples/finetuning_old/synth_qwen_v1/finetune.py +1 -1
  9. examples/finetuning_old/synth_qwen_v1/hello_ft_model.py +4 -4
  10. examples/finetuning_old/synth_qwen_v1/infer.py +1 -2
  11. examples/finetuning_old/synth_qwen_v1/poll.py +4 -2
  12. examples/finetuning_old/synth_qwen_v1/prepare_data.py +8 -8
  13. examples/finetuning_old/synth_qwen_v1/react_agent_lm.py +5 -4
  14. examples/finetuning_old/synth_qwen_v1/run_crafter_sft_job.py +11 -8
  15. examples/finetuning_old/synth_qwen_v1/run_ft_job.py +17 -12
  16. examples/finetuning_old/synth_qwen_v1/upload_data.py +1 -1
  17. examples/finetuning_old/synth_qwen_v1/util.py +7 -2
  18. examples/rl/configs/eval_base_qwen.toml +1 -1
  19. examples/rl/configs/rl_from_base_qwen17.toml +1 -1
  20. examples/rl/download_dataset.py +26 -10
  21. examples/rl/run_eval.py +17 -15
  22. examples/rl/run_rl_and_save.py +24 -7
  23. examples/rl/task_app/math_single_step.py +128 -11
  24. examples/rl/task_app/math_task_app.py +11 -3
  25. examples/rl_old/task_app.py +222 -53
  26. examples/warming_up_to_rl/analyze_trace_db.py +7 -5
  27. examples/warming_up_to_rl/export_trace_sft.py +141 -16
  28. examples/warming_up_to_rl/groq_test.py +11 -4
  29. examples/warming_up_to_rl/manage_secrets.py +15 -6
  30. examples/warming_up_to_rl/readme.md +9 -2
  31. examples/warming_up_to_rl/run_eval.py +108 -30
  32. examples/warming_up_to_rl/run_fft_and_save.py +128 -52
  33. examples/warming_up_to_rl/run_local_rollout.py +87 -36
  34. examples/warming_up_to_rl/run_local_rollout_modal.py +113 -25
  35. examples/warming_up_to_rl/run_local_rollout_parallel.py +80 -16
  36. examples/warming_up_to_rl/run_local_rollout_traced.py +125 -20
  37. examples/warming_up_to_rl/run_rl_and_save.py +31 -7
  38. examples/warming_up_to_rl/run_rollout_remote.py +37 -10
  39. examples/warming_up_to_rl/task_app/grpo_crafter.py +90 -27
  40. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +9 -27
  41. examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +46 -108
  42. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -1
  43. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +1 -1
  44. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -1
  45. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +50 -17
  46. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +35 -21
  47. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +8 -4
  48. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +29 -26
  49. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +1 -1
  50. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +17 -13
  51. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +1 -1
  52. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +106 -63
  53. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +82 -84
  54. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +76 -59
  55. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +1 -1
  56. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +43 -49
  57. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +5 -15
  58. synth_ai/__init__.py +1 -0
  59. synth_ai/api/train/builders.py +34 -10
  60. synth_ai/api/train/cli.py +172 -32
  61. synth_ai/api/train/config_finder.py +59 -4
  62. synth_ai/api/train/env_resolver.py +32 -14
  63. synth_ai/api/train/pollers.py +11 -3
  64. synth_ai/api/train/task_app.py +4 -1
  65. synth_ai/api/train/utils.py +20 -4
  66. synth_ai/cli/__init__.py +11 -4
  67. synth_ai/cli/balance.py +1 -1
  68. synth_ai/cli/demo.py +19 -5
  69. synth_ai/cli/rl_demo.py +75 -16
  70. synth_ai/cli/root.py +116 -37
  71. synth_ai/cli/task_apps.py +1276 -186
  72. synth_ai/cli/traces.py +1 -0
  73. synth_ai/cli/turso.py +73 -0
  74. synth_ai/core/experiment.py +0 -2
  75. synth_ai/demo_registry.py +67 -30
  76. synth_ai/demos/core/cli.py +493 -164
  77. synth_ai/demos/demo_task_apps/core.py +50 -6
  78. synth_ai/demos/demo_task_apps/crafter/configs/crafter_fft_4b.toml +2 -3
  79. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +36 -28
  80. synth_ai/demos/demo_task_apps/math/_common.py +1 -2
  81. synth_ai/demos/demo_task_apps/math/deploy_modal.py +0 -2
  82. synth_ai/demos/demo_task_apps/math/modal_task_app.py +168 -65
  83. synth_ai/demos/demo_task_apps/math/task_app_entry.py +0 -1
  84. synth_ai/environments/examples/bandit/engine.py +12 -4
  85. synth_ai/environments/examples/bandit/taskset.py +4 -4
  86. synth_ai/environments/reproducibility/tree.py +3 -1
  87. synth_ai/environments/service/core_routes.py +6 -2
  88. synth_ai/evals/base.py +0 -2
  89. synth_ai/experimental/synth_oss.py +11 -12
  90. synth_ai/handshake.py +3 -1
  91. synth_ai/http_client.py +31 -7
  92. synth_ai/inference/__init__.py +0 -2
  93. synth_ai/inference/client.py +8 -4
  94. synth_ai/jobs/client.py +40 -10
  95. synth_ai/learning/client.py +33 -8
  96. synth_ai/learning/config.py +0 -2
  97. synth_ai/learning/constants.py +0 -2
  98. synth_ai/learning/ft_client.py +6 -3
  99. synth_ai/learning/health.py +9 -2
  100. synth_ai/learning/jobs.py +17 -5
  101. synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +1 -3
  102. synth_ai/learning/prompts/random_search.py +4 -1
  103. synth_ai/learning/prompts/run_random_search_banking77.py +6 -1
  104. synth_ai/learning/rl_client.py +42 -14
  105. synth_ai/learning/sse.py +0 -2
  106. synth_ai/learning/validators.py +6 -2
  107. synth_ai/lm/caching/ephemeral.py +1 -3
  108. synth_ai/lm/core/exceptions.py +0 -2
  109. synth_ai/lm/core/main.py +13 -1
  110. synth_ai/lm/core/synth_models.py +0 -1
  111. synth_ai/lm/core/vendor_clients.py +4 -2
  112. synth_ai/lm/overrides.py +2 -2
  113. synth_ai/lm/vendors/core/anthropic_api.py +7 -7
  114. synth_ai/lm/vendors/core/openai_api.py +2 -0
  115. synth_ai/lm/vendors/openai_standard.py +3 -1
  116. synth_ai/lm/vendors/openai_standard_responses.py +6 -3
  117. synth_ai/lm/vendors/supported/custom_endpoint.py +1 -3
  118. synth_ai/lm/vendors/synth_client.py +37 -10
  119. synth_ai/rl/__init__.py +0 -1
  120. synth_ai/rl/contracts.py +0 -2
  121. synth_ai/rl/env_keys.py +6 -1
  122. synth_ai/task/__init__.py +1 -0
  123. synth_ai/task/apps/__init__.py +11 -11
  124. synth_ai/task/auth.py +29 -17
  125. synth_ai/task/client.py +3 -1
  126. synth_ai/task/contracts.py +1 -0
  127. synth_ai/task/datasets.py +3 -1
  128. synth_ai/task/errors.py +3 -2
  129. synth_ai/task/health.py +0 -2
  130. synth_ai/task/json.py +0 -1
  131. synth_ai/task/proxy.py +2 -5
  132. synth_ai/task/rubrics.py +9 -3
  133. synth_ai/task/server.py +31 -5
  134. synth_ai/task/tracing_utils.py +8 -3
  135. synth_ai/task/validators.py +0 -1
  136. synth_ai/task/vendors.py +0 -1
  137. synth_ai/tracing_v3/db_config.py +26 -1
  138. synth_ai/tracing_v3/decorators.py +1 -0
  139. synth_ai/tracing_v3/examples/basic_usage.py +3 -2
  140. synth_ai/tracing_v3/hooks.py +2 -0
  141. synth_ai/tracing_v3/replica_sync.py +1 -0
  142. synth_ai/tracing_v3/session_tracer.py +24 -3
  143. synth_ai/tracing_v3/storage/base.py +4 -1
  144. synth_ai/tracing_v3/storage/factory.py +0 -1
  145. synth_ai/tracing_v3/turso/manager.py +102 -38
  146. synth_ai/tracing_v3/turso/models.py +4 -1
  147. synth_ai/tracing_v3/utils.py +1 -0
  148. synth_ai/v0/tracing/upload.py +32 -135
  149. {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.9.dev7.dist-info}/METADATA +1 -1
  150. {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.9.dev7.dist-info}/RECORD +154 -154
  151. synth_ai/install_sqld.sh +0 -40
  152. {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.9.dev7.dist-info}/WHEEL +0 -0
  153. {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.9.dev7.dist-info}/entry_points.txt +0 -0
  154. {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.9.dev7.dist-info}/licenses/LICENSE +0 -0
  155. {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.9.dev7.dist-info}/top_level.txt +0 -0
@@ -24,6 +24,7 @@ class DemoEnv:
24
24
  task_app_name: str = ""
25
25
  task_app_secret_name: str = DEFAULT_TASK_APP_SECRET_NAME
26
26
 
27
+
27
28
  def _mask(value: str, keep: int = 4) -> str:
28
29
  if not value:
29
30
  return ""
@@ -121,6 +122,32 @@ def persist_env_api_key(key: str) -> None:
121
122
  _write_state(data)
122
123
 
123
124
 
125
+ def persist_demo_dir(demo_dir: str) -> None:
126
+ """Store the demo directory path for subsequent commands."""
127
+ data = _read_state()
128
+ data["DEMO_DIR"] = demo_dir
129
+ _write_state(data)
130
+
131
+
132
+ def load_demo_dir() -> str | None:
133
+ """Load the stored demo directory path, if any."""
134
+ data = _read_state()
135
+ return data.get("DEMO_DIR")
136
+
137
+
138
+ def persist_env_file_path(env_path: str) -> None:
139
+ """Store the .env file path for subsequent commands."""
140
+ data = _read_state()
141
+ data["ENV_FILE_PATH"] = env_path
142
+ _write_state(data)
143
+
144
+
145
+ def load_env_file_path() -> str | None:
146
+ """Load the stored .env file path, if any."""
147
+ data = _read_state()
148
+ return data.get("ENV_FILE_PATH")
149
+
150
+
124
151
  def modal_auth_status() -> Tuple[bool, str]:
125
152
  """Return (ok, message) describing Modal CLI credential status."""
126
153
 
@@ -192,7 +219,9 @@ def load_env() -> DemoEnv:
192
219
  # Repo/package .envs (fallbacks)
193
220
  repo_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../.."))
194
221
  repo_env = load_dotenv_file(os.path.join(repo_root, ".env"))
195
- pkg_env = load_dotenv_file(os.path.join(repo_root, "synth_ai", "demos", "demo_task_apps", "math", ".env"))
222
+ pkg_env = load_dotenv_file(
223
+ os.path.join(repo_root, "synth_ai", "demos", "demo_task_apps", "math", ".env")
224
+ )
196
225
  examples_env = load_dotenv_file(os.path.join(repo_root, "examples", "rl", ".env"))
197
226
 
198
227
  state = _read_state()
@@ -241,7 +270,11 @@ def load_env() -> DemoEnv:
241
270
  or str(state.get("SYNTH_API_KEY") or "")
242
271
  )
243
272
  if not synth_api_key:
244
- mode = "prod" if default_root in dev_url else ("local" if ("localhost" in dev_url or "127.0.0.1" in dev_url) else "dev")
273
+ mode = (
274
+ "prod"
275
+ if default_root in dev_url
276
+ else ("local" if ("localhost" in dev_url or "127.0.0.1" in dev_url) else "dev")
277
+ )
245
278
  if mode == "prod":
246
279
  synth_api_key = (
247
280
  os_env.get("PROD_SYNTH_API_KEY")
@@ -310,7 +343,9 @@ def load_env() -> DemoEnv:
310
343
  return env
311
344
 
312
345
 
313
- def assert_http_ok(url: str, method: str = "GET", allow_redirects: bool = True, timeout: float = 10.0) -> bool:
346
+ def assert_http_ok(
347
+ url: str, method: str = "GET", allow_redirects: bool = True, timeout: float = 10.0
348
+ ) -> bool:
314
349
  try:
315
350
  import ssl
316
351
 
@@ -387,7 +422,14 @@ def persist_api_key(key: str) -> None:
387
422
  _write_state(data)
388
423
 
389
424
 
390
- def run_job(env: DemoEnv, config_toml_path: str, *, batch_size: Optional[int] = None, group_size: Optional[int] = None, model: Optional[str] = None) -> None:
425
+ def run_job(
426
+ env: DemoEnv,
427
+ config_toml_path: str,
428
+ *,
429
+ batch_size: Optional[int] = None,
430
+ group_size: Optional[int] = None,
431
+ model: Optional[str] = None,
432
+ ) -> None:
391
433
  """Create and stream a short RL job using the backend API (placeholder: prints cURL to execute)."""
392
434
  backend = env.dev_backend_url.rstrip("/")
393
435
  if backend.endswith("/api"):
@@ -396,9 +438,11 @@ def run_job(env: DemoEnv, config_toml_path: str, *, batch_size: Optional[int] =
396
438
  api_base = backend + "/api"
397
439
  print("\nTo create an RL job, run:")
398
440
  print(
399
- "curl -s -X POST \"" + api_base + "/rl/jobs\" "
441
+ 'curl -s -X POST "' + api_base + '/rl/jobs" '
400
442
  "-H 'Content-Type: application/json' "
401
443
  f"-H 'Authorization: Bearer {env.synth_api_key}' "
402
444
  "-d '{" # intentionally not fully formed here for brevity in this scaffold
403
445
  )
404
- print(" NOTE: CLI implementation will build the full JSON body with inline TOML config and stream events.")
446
+ print(
447
+ " NOTE: CLI implementation will build the full JSON body with inline TOML config and stream events."
448
+ )
@@ -8,8 +8,7 @@ variety = "fft"
8
8
 
9
9
  [job]
10
10
  model = "Qwen/Qwen3-4B"
11
- # Limit training to the first 100 conversations (export a 100-row JSONL and point to it here)
12
- # data = "../ft_data/qwen3_32b_ach_ge3_raw_filtered.head100.jsonl"
11
+ data = "ft_data/crafter_traces.jsonl"
13
12
 
14
13
  [compute]
15
14
  # Adjust as needed for your quota
@@ -23,7 +22,7 @@ topology = {}
23
22
 
24
23
  # Optional local validation dataset path (JSONL). If set, the client will upload
25
24
  # this file and wire up validation so the frontend can display val.loss.
26
- validation_path = "../ft_data/qwen3_32b_ach_ge3_raw_filtered.tokens_1000000_seed_123.val_2000.jsonl"
25
+ # validation_path = "../ft_data/crafter_validation.jsonl"
27
26
 
28
27
  [training]
29
28
  mode = "sft_offline"
@@ -1,10 +1,9 @@
1
-
2
1
  """Compatibility wrapper for the GRPO Crafter task app.
3
2
 
4
- This module now delegates to the shared TaskAppConfig defined in
5
- `synth_ai.task.apps.grpo_crafter`. It is kept for legacy usage (running the
6
- file directly or targeting `fastapi_app` from external tooling). Prefer using
7
- `uvx synth-ai serve grpo-crafter` for local development and testing.
3
+ This module now delegates to the TaskAppConfig defined in the local example at
4
+ `examples/warming_up_to_rl/task_app/grpo_crafter.py`. It is kept for legacy usage
5
+ (running the file directly or targeting `fastapi_app` from external tooling).
6
+ Prefer using `uvx synth-ai serve grpo-crafter` for local development and testing.
8
7
  """
9
8
 
10
9
  from __future__ import annotations
@@ -17,35 +16,43 @@ from fastapi.responses import JSONResponse
17
16
  from starlette.requests import Request
18
17
 
19
18
  from synth_ai.task.apps import ModalDeploymentConfig, registry
20
- from synth_ai.task.apps.grpo_crafter import build_config
21
19
  from synth_ai.task.auth import is_api_key_header_authorized, normalize_environment_api_key
22
20
  from synth_ai.task.server import TaskAppConfig, create_task_app, run_task_app
21
+ import importlib.util
22
+
23
+
24
+ def _load_build_config():
25
+ # Find synth_ai package location to locate examples/
26
+ import synth_ai
27
+
28
+ synth_ai_path = Path(synth_ai.__file__).resolve().parent.parent
29
+ module_path = synth_ai_path / "examples" / "warming_up_to_rl" / "task_app" / "grpo_crafter.py"
30
+
31
+ if not module_path.exists():
32
+ raise ImportError(
33
+ f"Could not find task app module at {module_path}. Make sure you're running from the synth-ai repository."
34
+ )
35
+
36
+ spec = importlib.util.spec_from_file_location(
37
+ "warming_up_to_rl.task_app.grpo_crafter", module_path
38
+ )
39
+ if spec is None or spec.loader is None:
40
+ raise ImportError(f"Could not load task app module at {module_path}")
41
+ module = importlib.util.module_from_spec(spec)
42
+ spec.loader.exec_module(module)
43
+ return getattr(module, "build_config")
44
+
45
+
46
+ build_config = _load_build_config()
23
47
 
24
48
 
25
49
  APP_ID = "grpo-crafter"
26
50
 
27
51
 
28
- _BASE_CONFIG = build_config()
29
- TASK_APP_CONFIG = TaskAppConfig(
30
- app_id="grpo-crafter",
31
- name=_BASE_CONFIG.name,
32
- description=_BASE_CONFIG.description,
33
- base_task_info=_BASE_CONFIG.base_task_info,
34
- describe_taskset=_BASE_CONFIG.describe_taskset,
35
- provide_task_instances=_BASE_CONFIG.provide_task_instances,
36
- rollout=_BASE_CONFIG.rollout,
37
- dataset_registry=_BASE_CONFIG.dataset_registry,
38
- rubrics=_BASE_CONFIG.rubrics,
39
- proxy=_BASE_CONFIG.proxy,
40
- routers=_BASE_CONFIG.routers,
41
- middleware=_BASE_CONFIG.middleware,
42
- app_state=_BASE_CONFIG.app_state,
43
- require_api_key=_BASE_CONFIG.require_api_key,
44
- expose_debug_env=_BASE_CONFIG.expose_debug_env,
45
- cors_origins=_BASE_CONFIG.cors_origins,
46
- startup_hooks=_BASE_CONFIG.startup_hooks,
47
- shutdown_hooks=_BASE_CONFIG.shutdown_hooks,
48
- )
52
+ def _build_base_config() -> TaskAppConfig:
53
+ # Lazily construct the base config to avoid heavy work at import time
54
+ return build_config()
55
+
49
56
 
50
57
  try:
51
58
  _REGISTERED_ENTRY = registry.get(APP_ID)
@@ -60,7 +67,8 @@ else:
60
67
  def build_task_app_config() -> TaskAppConfig:
61
68
  """Return a fresh TaskAppConfig for this wrapper."""
62
69
 
63
- return TASK_APP_CONFIG.clone()
70
+ base = _build_base_config()
71
+ return base.clone()
64
72
 
65
73
 
66
74
  def fastapi_app():
@@ -6,6 +6,7 @@ This module provides a local fallback for install_problem_bank_into_shared so
6
6
  the modal task app can import it without requiring an external math_rl package.
7
7
  """
8
8
 
9
+
9
10
  def install_problem_bank_into_shared() -> None:
10
11
  """No-op placeholder for installing the Hendrycks MATH problem bank.
11
12
 
@@ -13,5 +14,3 @@ def install_problem_bank_into_shared() -> None:
13
14
  into a shared directory. For the demo scaffold, it is a no-op.
14
15
  """
15
16
  return None
16
-
17
-
@@ -56,5 +56,3 @@ def deploy(script_path: Optional[str] = None, *, env_api_key: Optional[str] = No
56
56
  f"No deploy script provided and Python-based deploy failed: {e}. "
57
57
  "Pass --script /path/to/deploy_task_app.sh to demo.deploy."
58
58
  )
59
-
60
-
@@ -25,7 +25,9 @@ _SYNTH_HOSTED = None
25
25
  try:
26
26
  probe = _HERE
27
27
  for _ in range(8):
28
- candidate = (probe / "backend/app/routes/clustered_training/dev/synth_envs_hosted").resolve()
28
+ candidate = (
29
+ probe / "backend/app/routes/clustered_training/dev/synth_envs_hosted"
30
+ ).resolve()
29
31
  if candidate.exists():
30
32
  _SYNTH_HOSTED = candidate
31
33
  break
@@ -101,12 +103,14 @@ def fastapi_app():
101
103
  from fastapi import FastAPI
102
104
  from fastapi.middleware.cors import CORSMiddleware
103
105
  from fastapi.responses import JSONResponse
106
+
104
107
  try:
105
108
  from synth_ai.task.auth import (
106
109
  is_api_key_header_authorized,
107
110
  normalize_environment_api_key,
108
111
  )
109
112
  except Exception: # pragma: no cover - fallback for older synth-ai builds
113
+
110
114
  def _normalize_env_key_fallback() -> str | None:
111
115
  key = os.getenv("ENVIRONMENT_API_KEY")
112
116
  if key:
@@ -130,7 +134,7 @@ def fastapi_app():
130
134
  for value in values:
131
135
  if not isinstance(value, str):
132
136
  continue
133
- for chunk in value.split(','):
137
+ for chunk in value.split(","):
134
138
  chunk = chunk.strip()
135
139
  if chunk:
136
140
  parts.append(chunk)
@@ -172,19 +176,27 @@ def fastapi_app():
172
176
 
173
177
  def _normalize_answer_text(s: str) -> str:
174
178
  import re as _re
179
+
175
180
  return _re.sub(r"[^0-9A-Za-z.+\-/*=]", "", (s or "").strip()).lower()
176
181
 
177
182
  def _extract_boxed(s: str) -> str:
178
183
  import re as _re
184
+
179
185
  m = list(_re.finditer(r"\\boxed\{([^}]+)\}", s or ""))
180
186
  return m[-1].group(1) if m else ""
181
187
 
182
188
  def _load_hendrycks_problem(seed: int, subject: str | None = None) -> tuple[str, str]:
183
189
  subj = subject or os.getenv("HENDRYCKS_MATH_CONFIG", "default")
184
- ds = _hf_split(subj, os.getenv("HENDRYCKS_MATH_SPLIT", "test"), os.getenv("HENDRYCKS_MATH_SLICE"))
190
+ ds = _hf_split(
191
+ subj, os.getenv("HENDRYCKS_MATH_SPLIT", "test"), os.getenv("HENDRYCKS_MATH_SLICE")
192
+ )
185
193
  n = len(ds) if hasattr(ds, "__len__") else 0
186
194
  if n == 0 and subject not in {"", "default"}:
187
- ds = _hf_split("default", os.getenv("HENDRYCKS_MATH_SPLIT", "test"), os.getenv("HENDRYCKS_MATH_SLICE"))
195
+ ds = _hf_split(
196
+ "default",
197
+ os.getenv("HENDRYCKS_MATH_SPLIT", "test"),
198
+ os.getenv("HENDRYCKS_MATH_SLICE"),
199
+ )
188
200
  n = len(ds) if hasattr(ds, "__len__") else 0
189
201
  if n == 0:
190
202
  raise RuntimeError("Hendrycks MATH dataset loaded empty")
@@ -225,7 +237,11 @@ def fastapi_app():
225
237
 
226
238
  def _resolve_env_keys() -> set[str]:
227
239
  keys: set[str] = set()
228
- for alias in ("ENVIRONMENT_API_KEY", "dev_environment_api_key", "DEV_ENVIRONMENT_API_KEY"):
240
+ for alias in (
241
+ "ENVIRONMENT_API_KEY",
242
+ "dev_environment_api_key",
243
+ "DEV_ENVIRONMENT_API_KEY",
244
+ ):
229
245
  value = os.environ.get(alias)
230
246
  if value:
231
247
  os.environ.setdefault("ENVIRONMENT_API_KEY", value)
@@ -250,8 +266,12 @@ def fastapi_app():
250
266
  candidates.append(primary.strip())
251
267
  secondary = x_api_keys or headers.get("x-api-keys")
252
268
  if secondary:
253
- candidates.extend([value.strip() for value in secondary.split(",") if value.strip()])
254
- auth_header = authorization or headers.get("authorization") or headers.get("Authorization")
269
+ candidates.extend(
270
+ [value.strip() for value in secondary.split(",") if value.strip()]
271
+ )
272
+ auth_header = (
273
+ authorization or headers.get("authorization") or headers.get("Authorization")
274
+ )
255
275
  if auth_header and auth_header.lower().startswith("bearer "):
256
276
  token = auth_header.split(" ", 1)[1].strip()
257
277
  if token:
@@ -274,7 +294,10 @@ def fastapi_app():
274
294
  async def info():
275
295
  return {
276
296
  "service": {"base_url": os.getenv("SERVICE_BASE_URL", "")},
277
- "inference": {"base_url": "", "endpoints": {"chat_completions": "/v1/chat/completions"}},
297
+ "inference": {
298
+ "base_url": "",
299
+ "endpoints": {"chat_completions": "/v1/chat/completions"},
300
+ },
278
301
  }
279
302
 
280
303
  @app.get("/health")
@@ -282,7 +305,10 @@ def fastapi_app():
282
305
  env_keys = _resolve_env_keys()
283
306
  env_key = next(iter(env_keys), None)
284
307
  if not env_key:
285
- return JSONResponse(status_code=503, content={"status": "unhealthy", "detail": "Missing ENVIRONMENT_API_KEY"})
308
+ return JSONResponse(
309
+ status_code=503,
310
+ content={"status": "unhealthy", "detail": "Missing ENVIRONMENT_API_KEY"},
311
+ )
286
312
  # Authorize using all header variants; avoid typed Header params to prevent 422s
287
313
  authorized = is_api_key_header_authorized(request)
288
314
  if not authorized:
@@ -302,7 +328,10 @@ def fastapi_app():
302
328
  env_keys = _resolve_env_keys()
303
329
  env_key = next(iter(env_keys), None)
304
330
  if not env_key:
305
- return JSONResponse(status_code=503, content={"status": "unhealthy", "detail": "Missing ENVIRONMENT_API_KEY"})
331
+ return JSONResponse(
332
+ status_code=503,
333
+ content={"status": "unhealthy", "detail": "Missing ENVIRONMENT_API_KEY"},
334
+ )
306
335
  authorized = is_api_key_header_authorized(request)
307
336
  if not authorized:
308
337
  prefix = _log_env_key_prefix("health/rollout", env_key)
@@ -321,17 +350,22 @@ def fastapi_app():
321
350
  async def task_info(seed: int = 0, subject: str = "default"):
322
351
  """Return Hendrycks MATH problem/answer and tool schema for a seed."""
323
352
  q, a = _load_hendrycks_problem(int(seed), subject=subject)
324
- tools = [{
325
- "name": "submit_answer",
326
- "description": "Provide the final numerical or algebraic answer for the current math problem.",
327
- "parameters": {
328
- "type": "object",
329
- "properties": {
330
- "answer": {"type": "string", "description": "The proposed final answer"},
353
+ tools = [
354
+ {
355
+ "name": "submit_answer",
356
+ "description": "Provide the final numerical or algebraic answer for the current math problem.",
357
+ "parameters": {
358
+ "type": "object",
359
+ "properties": {
360
+ "answer": {
361
+ "type": "string",
362
+ "description": "The proposed final answer",
363
+ },
364
+ },
365
+ "required": ["answer"],
331
366
  },
332
- "required": ["answer"],
333
- },
334
- }]
367
+ }
368
+ ]
335
369
  return {
336
370
  "seed": int(seed),
337
371
  "subject": subject,
@@ -363,7 +397,9 @@ def fastapi_app():
363
397
  print("[422] validation", snapshot, flush=True)
364
398
  except Exception:
365
399
  pass
366
- return JSONResponse(status_code=422, content={"status": "invalid", "detail": exc.errors()[:5]})
400
+ return JSONResponse(
401
+ status_code=422, content={"status": "invalid", "detail": exc.errors()[:5]}
402
+ )
367
403
 
368
404
  @api.get("/")
369
405
  async def root_probe():
@@ -381,7 +417,12 @@ def fastapi_app():
381
417
  if not env_key:
382
418
  raise RuntimeError("ENVIRONMENT_API_KEY missing in task app environment")
383
419
 
384
- OPENAI_REMOVE_FIELDS = ("stop_after_tool_calls", "thinking_mode", "thinking_budget", "reasoning")
420
+ OPENAI_REMOVE_FIELDS = (
421
+ "stop_after_tool_calls",
422
+ "thinking_mode",
423
+ "thinking_budget",
424
+ "reasoning",
425
+ )
385
426
  OPENAI_REMOVE_SAMPLING_FIELDS = ("temperature", "top_p")
386
427
  TOOL_CHOICE_FORCE = {"type": "function", "function": {"name": "submit_answer"}}
387
428
 
@@ -404,12 +445,18 @@ def fastapi_app():
404
445
  def proxy_chat_completions(request: dict[str, object] = Body(...)):
405
446
  key = os.environ.get("OPENAI_API_KEY")
406
447
  if not key:
407
- raise HTTPException(status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail="OPENAI_API_KEY missing")
448
+ raise HTTPException(
449
+ status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail="OPENAI_API_KEY missing"
450
+ )
408
451
  model = request.get("model") if isinstance(request, dict) else None
409
- payload = _prepare_openai_payload(model if isinstance(model, str) else None, request if isinstance(request, dict) else {})
452
+ payload = _prepare_openai_payload(
453
+ model if isinstance(model, str) else None, request if isinstance(request, dict) else {}
454
+ )
410
455
  headers = {"Authorization": f"Bearer {key}"}
411
456
  with httpx.Client(timeout=httpx.Timeout(180.0), follow_redirects=True) as client:
412
- resp = client.post("https://api.openai.com/v1/chat/completions", json=payload, headers=headers)
457
+ resp = client.post(
458
+ "https://api.openai.com/v1/chat/completions", json=payload, headers=headers
459
+ )
413
460
  try:
414
461
  data = resp.json()
415
462
  except Exception:
@@ -442,15 +489,25 @@ def fastapi_app():
442
489
  env_cfg = (env or {}).get("config") or {}
443
490
  # Prefer env.seed; fall back to env.config.seed -> default 0
444
491
  try:
445
- seed_val = int((env or {}).get("seed")) if isinstance(env, dict) and (env or {}).get("seed") is not None else 0
492
+ seed_val = (
493
+ int((env or {}).get("seed"))
494
+ if isinstance(env, dict) and (env or {}).get("seed") is not None
495
+ else 0
496
+ )
446
497
  except Exception:
447
498
  seed_val = 0
448
499
  if seed_val == 0:
449
500
  try:
450
- seed_val = int(env_cfg.get("seed")) if isinstance(env_cfg, dict) and env_cfg.get("seed") is not None else 0
501
+ seed_val = (
502
+ int(env_cfg.get("seed"))
503
+ if isinstance(env_cfg, dict) and env_cfg.get("seed") is not None
504
+ else 0
505
+ )
451
506
  except Exception:
452
507
  seed_val = 0
453
- subject = (env_cfg.get("subject") if isinstance(env_cfg, dict) else None) or os.getenv("HENDRYCKS_MATH_CONFIG", "default")
508
+ subject = (env_cfg.get("subject") if isinstance(env_cfg, dict) else None) or os.getenv(
509
+ "HENDRYCKS_MATH_CONFIG", "default"
510
+ )
454
511
  # Load real Hendrycks problem text/solution (download if necessary). Crash on failure.
455
512
  qh, ah = _load_hendrycks_problem(seed_val, subject=subject)
456
513
  question = qh
@@ -468,7 +525,10 @@ def fastapi_app():
468
525
  sanitized.pop("max_tokens", None)
469
526
  for field in ("temperature", "top_p"):
470
527
  sanitized.pop(field, None)
471
- sanitized["tool_choice"] = {"type": "function", "function": {"name": "submit_answer"}}
528
+ sanitized["tool_choice"] = {
529
+ "type": "function",
530
+ "function": {"name": "submit_answer"},
531
+ }
472
532
  sanitized["parallel_tool_calls"] = False
473
533
  return sanitized
474
534
 
@@ -509,19 +569,21 @@ def fastapi_app():
509
569
  payload = {
510
570
  "model": model,
511
571
  "messages": [{"role": "user", "content": user_prompt}],
512
- "tools": [{
513
- "type": "function",
514
- "function": {
515
- "name": "submit_answer",
516
- "parameters": {
517
- "type": "object",
518
- "properties": {
519
- "answer": {"type": "string"},
572
+ "tools": [
573
+ {
574
+ "type": "function",
575
+ "function": {
576
+ "name": "submit_answer",
577
+ "parameters": {
578
+ "type": "object",
579
+ "properties": {
580
+ "answer": {"type": "string"},
581
+ },
582
+ "required": ["answer"],
520
583
  },
521
- "required": ["answer"],
522
584
  },
523
- },
524
- }],
585
+ }
586
+ ],
525
587
  "max_tokens": 256,
526
588
  "temperature": 0.2,
527
589
  }
@@ -529,7 +591,7 @@ def fastapi_app():
529
591
 
530
592
  try:
531
593
  tool_names = []
532
- for t in (payload.get("tools") or []):
594
+ for t in payload.get("tools") or []:
533
595
  if isinstance(t, dict):
534
596
  fn = (t.get("function") or {}) if isinstance(t.get("function"), dict) else {}
535
597
  name = fn.get("name")
@@ -547,7 +609,9 @@ def fastapi_app():
547
609
  if sk:
548
610
  headers["Authorization"] = f"Bearer {sk}"
549
611
  with httpx.Client(timeout=httpx.Timeout(180.0), follow_redirects=True) as client:
550
- resp = client.post(f"{inference_url}/v1/chat/completions", json=to_send, headers=headers)
612
+ resp = client.post(
613
+ f"{inference_url}/v1/chat/completions", json=to_send, headers=headers
614
+ )
551
615
  try:
552
616
  data = resp.json()
553
617
  except Exception:
@@ -580,14 +644,21 @@ def fastapi_app():
580
644
 
581
645
  tool_answer = _parse_tool_answer(data)
582
646
  history.append({"answer": tool_answer})
583
- steps.append({
584
- "obs": {},
585
- "tool_calls": [{"tool_name": "submit_answer", "arguments": _json.dumps({"answer": tool_answer})}],
586
- "reward": None,
587
- "done": False,
588
- "truncated": False,
589
- "info": None,
590
- })
647
+ steps.append(
648
+ {
649
+ "obs": {},
650
+ "tool_calls": [
651
+ {
652
+ "tool_name": "submit_answer",
653
+ "arguments": _json.dumps({"answer": tool_answer}),
654
+ }
655
+ ],
656
+ "reward": None,
657
+ "done": False,
658
+ "truncated": False,
659
+ "info": None,
660
+ }
661
+ )
591
662
 
592
663
  # Evaluate answer correctness using tool output (or fall back to assistant text)
593
664
  reward_val = 0.0
@@ -605,25 +676,57 @@ def fastapi_app():
605
676
  except Exception:
606
677
  reward_val = 0.0
607
678
 
679
+ # Immediate, concise rollout logging mirroring RL format
680
+ try:
681
+ preview = tool_answer[:120] + (
682
+ "…" if isinstance(tool_answer, str) and len(tool_answer) > 120 else ""
683
+ )
684
+ components = {
685
+ "env": float(reward_val),
686
+ "rubric_event": 1.0 if bool(tool_answer.strip()) else 0.0,
687
+ "rubric_outcome": 1.0 if float(reward_val) > 0.0 else 0.0,
688
+ }
689
+ print(
690
+ "[MATH_ROLLOUT] run=",
691
+ run_id,
692
+ " seed=",
693
+ seed_val,
694
+ " subject=",
695
+ subject,
696
+ " tool=submit_answer answer=",
697
+ preview,
698
+ " reward=",
699
+ float(reward_val),
700
+ " components=",
701
+ components,
702
+ flush=True,
703
+ )
704
+ except Exception:
705
+ pass
706
+
608
707
  total_reward += float(reward_val)
609
- steps.append({
610
- "obs": {},
611
- "tool_calls": [],
612
- "reward": reward_val,
613
- "done": True,
614
- "truncated": False,
615
- "info": None,
616
- })
708
+ steps.append(
709
+ {
710
+ "obs": {},
711
+ "tool_calls": [],
712
+ "reward": reward_val,
713
+ "done": True,
714
+ "truncated": False,
715
+ "info": None,
716
+ }
717
+ )
617
718
 
618
719
  return {
619
720
  "run_id": run_id,
620
- "trajectories": [{
621
- "env_id": env_name,
622
- "policy_id": (policy or {}).get("policy_name") or "math-react",
623
- "steps": steps,
624
- "final": {"observation": {}},
625
- "length": len(steps),
626
- }],
721
+ "trajectories": [
722
+ {
723
+ "env_id": env_name,
724
+ "policy_id": (policy or {}).get("policy_name") or "math-react",
725
+ "steps": steps,
726
+ "final": {"observation": {}},
727
+ "length": len(steps),
728
+ }
729
+ ],
627
730
  "branches": {},
628
731
  "metrics": {
629
732
  "episode_returns": [total_reward],
@@ -36,4 +36,3 @@ register_task_app(
36
36
  modal=DEMO_MODAL_CONFIG,
37
37
  )
38
38
  )
39
-