synth-ai 0.2.9.dev5__py3-none-any.whl → 0.2.9.dev7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- examples/common_old/backend.py +0 -1
- examples/crafter_debug_render.py +15 -6
- examples/evals_old/compare_models.py +1 -0
- examples/finetuning_old/_backup_synth_qwen/filter_traces_achievements.py +6 -2
- examples/finetuning_old/_backup_synth_qwen/react_agent_lm.py +4 -4
- examples/finetuning_old/_backup_synth_qwen/sft_kickoff.py +4 -3
- examples/finetuning_old/synth_qwen_v1/filter_traces_achievements.py +6 -2
- examples/finetuning_old/synth_qwen_v1/finetune.py +1 -1
- examples/finetuning_old/synth_qwen_v1/hello_ft_model.py +4 -4
- examples/finetuning_old/synth_qwen_v1/infer.py +1 -2
- examples/finetuning_old/synth_qwen_v1/poll.py +4 -2
- examples/finetuning_old/synth_qwen_v1/prepare_data.py +8 -8
- examples/finetuning_old/synth_qwen_v1/react_agent_lm.py +5 -4
- examples/finetuning_old/synth_qwen_v1/run_crafter_sft_job.py +11 -8
- examples/finetuning_old/synth_qwen_v1/run_ft_job.py +17 -12
- examples/finetuning_old/synth_qwen_v1/upload_data.py +1 -1
- examples/finetuning_old/synth_qwen_v1/util.py +7 -2
- examples/rl/configs/eval_base_qwen.toml +1 -1
- examples/rl/configs/rl_from_base_qwen17.toml +1 -1
- examples/rl/download_dataset.py +26 -10
- examples/rl/run_eval.py +17 -15
- examples/rl/run_rl_and_save.py +24 -7
- examples/rl/task_app/math_single_step.py +128 -11
- examples/rl/task_app/math_task_app.py +11 -3
- examples/rl_old/task_app.py +222 -53
- examples/warming_up_to_rl/analyze_trace_db.py +7 -5
- examples/warming_up_to_rl/export_trace_sft.py +141 -16
- examples/warming_up_to_rl/groq_test.py +11 -4
- examples/warming_up_to_rl/manage_secrets.py +15 -6
- examples/warming_up_to_rl/readme.md +9 -2
- examples/warming_up_to_rl/run_eval.py +108 -30
- examples/warming_up_to_rl/run_fft_and_save.py +128 -52
- examples/warming_up_to_rl/run_local_rollout.py +87 -36
- examples/warming_up_to_rl/run_local_rollout_modal.py +113 -25
- examples/warming_up_to_rl/run_local_rollout_parallel.py +80 -16
- examples/warming_up_to_rl/run_local_rollout_traced.py +125 -20
- examples/warming_up_to_rl/run_rl_and_save.py +31 -7
- examples/warming_up_to_rl/run_rollout_remote.py +37 -10
- examples/warming_up_to_rl/task_app/grpo_crafter.py +90 -27
- examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +9 -27
- examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +46 -108
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +50 -17
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +35 -21
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +8 -4
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +29 -26
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +17 -13
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +106 -63
- examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +82 -84
- examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +76 -59
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +43 -49
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +5 -15
- synth_ai/__init__.py +1 -0
- synth_ai/api/train/builders.py +34 -10
- synth_ai/api/train/cli.py +172 -32
- synth_ai/api/train/config_finder.py +59 -4
- synth_ai/api/train/env_resolver.py +32 -14
- synth_ai/api/train/pollers.py +11 -3
- synth_ai/api/train/task_app.py +4 -1
- synth_ai/api/train/utils.py +20 -4
- synth_ai/cli/__init__.py +11 -4
- synth_ai/cli/balance.py +1 -1
- synth_ai/cli/demo.py +19 -5
- synth_ai/cli/rl_demo.py +75 -16
- synth_ai/cli/root.py +116 -37
- synth_ai/cli/task_apps.py +1276 -186
- synth_ai/cli/traces.py +1 -0
- synth_ai/cli/turso.py +73 -0
- synth_ai/core/experiment.py +0 -2
- synth_ai/demo_registry.py +67 -30
- synth_ai/demos/core/cli.py +493 -164
- synth_ai/demos/demo_task_apps/core.py +50 -6
- synth_ai/demos/demo_task_apps/crafter/configs/crafter_fft_4b.toml +2 -3
- synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +36 -28
- synth_ai/demos/demo_task_apps/math/_common.py +1 -2
- synth_ai/demos/demo_task_apps/math/deploy_modal.py +0 -2
- synth_ai/demos/demo_task_apps/math/modal_task_app.py +168 -65
- synth_ai/demos/demo_task_apps/math/task_app_entry.py +0 -1
- synth_ai/environments/examples/bandit/engine.py +12 -4
- synth_ai/environments/examples/bandit/taskset.py +4 -4
- synth_ai/environments/reproducibility/tree.py +3 -1
- synth_ai/environments/service/core_routes.py +6 -2
- synth_ai/evals/base.py +0 -2
- synth_ai/experimental/synth_oss.py +11 -12
- synth_ai/handshake.py +3 -1
- synth_ai/http_client.py +31 -7
- synth_ai/inference/__init__.py +0 -2
- synth_ai/inference/client.py +8 -4
- synth_ai/jobs/client.py +40 -10
- synth_ai/learning/client.py +33 -8
- synth_ai/learning/config.py +0 -2
- synth_ai/learning/constants.py +0 -2
- synth_ai/learning/ft_client.py +6 -3
- synth_ai/learning/health.py +9 -2
- synth_ai/learning/jobs.py +17 -5
- synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +1 -3
- synth_ai/learning/prompts/random_search.py +4 -1
- synth_ai/learning/prompts/run_random_search_banking77.py +6 -1
- synth_ai/learning/rl_client.py +42 -14
- synth_ai/learning/sse.py +0 -2
- synth_ai/learning/validators.py +6 -2
- synth_ai/lm/caching/ephemeral.py +1 -3
- synth_ai/lm/core/exceptions.py +0 -2
- synth_ai/lm/core/main.py +13 -1
- synth_ai/lm/core/synth_models.py +0 -1
- synth_ai/lm/core/vendor_clients.py +4 -2
- synth_ai/lm/overrides.py +2 -2
- synth_ai/lm/vendors/core/anthropic_api.py +7 -7
- synth_ai/lm/vendors/core/openai_api.py +2 -0
- synth_ai/lm/vendors/openai_standard.py +3 -1
- synth_ai/lm/vendors/openai_standard_responses.py +6 -3
- synth_ai/lm/vendors/supported/custom_endpoint.py +1 -3
- synth_ai/lm/vendors/synth_client.py +37 -10
- synth_ai/rl/__init__.py +0 -1
- synth_ai/rl/contracts.py +0 -2
- synth_ai/rl/env_keys.py +6 -1
- synth_ai/task/__init__.py +1 -0
- synth_ai/task/apps/__init__.py +11 -11
- synth_ai/task/auth.py +29 -17
- synth_ai/task/client.py +3 -1
- synth_ai/task/contracts.py +1 -0
- synth_ai/task/datasets.py +3 -1
- synth_ai/task/errors.py +3 -2
- synth_ai/task/health.py +0 -2
- synth_ai/task/json.py +0 -1
- synth_ai/task/proxy.py +2 -5
- synth_ai/task/rubrics.py +9 -3
- synth_ai/task/server.py +31 -5
- synth_ai/task/tracing_utils.py +8 -3
- synth_ai/task/validators.py +0 -1
- synth_ai/task/vendors.py +0 -1
- synth_ai/tracing_v3/db_config.py +26 -1
- synth_ai/tracing_v3/decorators.py +1 -0
- synth_ai/tracing_v3/examples/basic_usage.py +3 -2
- synth_ai/tracing_v3/hooks.py +2 -0
- synth_ai/tracing_v3/replica_sync.py +1 -0
- synth_ai/tracing_v3/session_tracer.py +24 -3
- synth_ai/tracing_v3/storage/base.py +4 -1
- synth_ai/tracing_v3/storage/factory.py +0 -1
- synth_ai/tracing_v3/turso/manager.py +102 -38
- synth_ai/tracing_v3/turso/models.py +4 -1
- synth_ai/tracing_v3/utils.py +1 -0
- synth_ai/v0/tracing/upload.py +32 -135
- {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.9.dev7.dist-info}/METADATA +1 -1
- {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.9.dev7.dist-info}/RECORD +154 -154
- synth_ai/install_sqld.sh +0 -40
- {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.9.dev7.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.9.dev7.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.9.dev7.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.2.9.dev5.dist-info → synth_ai-0.2.9.dev7.dist-info}/top_level.txt +0 -0
|
@@ -24,6 +24,7 @@ class DemoEnv:
|
|
|
24
24
|
task_app_name: str = ""
|
|
25
25
|
task_app_secret_name: str = DEFAULT_TASK_APP_SECRET_NAME
|
|
26
26
|
|
|
27
|
+
|
|
27
28
|
def _mask(value: str, keep: int = 4) -> str:
|
|
28
29
|
if not value:
|
|
29
30
|
return ""
|
|
@@ -121,6 +122,32 @@ def persist_env_api_key(key: str) -> None:
|
|
|
121
122
|
_write_state(data)
|
|
122
123
|
|
|
123
124
|
|
|
125
|
+
def persist_demo_dir(demo_dir: str) -> None:
|
|
126
|
+
"""Store the demo directory path for subsequent commands."""
|
|
127
|
+
data = _read_state()
|
|
128
|
+
data["DEMO_DIR"] = demo_dir
|
|
129
|
+
_write_state(data)
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
def load_demo_dir() -> str | None:
|
|
133
|
+
"""Load the stored demo directory path, if any."""
|
|
134
|
+
data = _read_state()
|
|
135
|
+
return data.get("DEMO_DIR")
|
|
136
|
+
|
|
137
|
+
|
|
138
|
+
def persist_env_file_path(env_path: str) -> None:
|
|
139
|
+
"""Store the .env file path for subsequent commands."""
|
|
140
|
+
data = _read_state()
|
|
141
|
+
data["ENV_FILE_PATH"] = env_path
|
|
142
|
+
_write_state(data)
|
|
143
|
+
|
|
144
|
+
|
|
145
|
+
def load_env_file_path() -> str | None:
|
|
146
|
+
"""Load the stored .env file path, if any."""
|
|
147
|
+
data = _read_state()
|
|
148
|
+
return data.get("ENV_FILE_PATH")
|
|
149
|
+
|
|
150
|
+
|
|
124
151
|
def modal_auth_status() -> Tuple[bool, str]:
|
|
125
152
|
"""Return (ok, message) describing Modal CLI credential status."""
|
|
126
153
|
|
|
@@ -192,7 +219,9 @@ def load_env() -> DemoEnv:
|
|
|
192
219
|
# Repo/package .envs (fallbacks)
|
|
193
220
|
repo_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../.."))
|
|
194
221
|
repo_env = load_dotenv_file(os.path.join(repo_root, ".env"))
|
|
195
|
-
pkg_env = load_dotenv_file(
|
|
222
|
+
pkg_env = load_dotenv_file(
|
|
223
|
+
os.path.join(repo_root, "synth_ai", "demos", "demo_task_apps", "math", ".env")
|
|
224
|
+
)
|
|
196
225
|
examples_env = load_dotenv_file(os.path.join(repo_root, "examples", "rl", ".env"))
|
|
197
226
|
|
|
198
227
|
state = _read_state()
|
|
@@ -241,7 +270,11 @@ def load_env() -> DemoEnv:
|
|
|
241
270
|
or str(state.get("SYNTH_API_KEY") or "")
|
|
242
271
|
)
|
|
243
272
|
if not synth_api_key:
|
|
244
|
-
mode =
|
|
273
|
+
mode = (
|
|
274
|
+
"prod"
|
|
275
|
+
if default_root in dev_url
|
|
276
|
+
else ("local" if ("localhost" in dev_url or "127.0.0.1" in dev_url) else "dev")
|
|
277
|
+
)
|
|
245
278
|
if mode == "prod":
|
|
246
279
|
synth_api_key = (
|
|
247
280
|
os_env.get("PROD_SYNTH_API_KEY")
|
|
@@ -310,7 +343,9 @@ def load_env() -> DemoEnv:
|
|
|
310
343
|
return env
|
|
311
344
|
|
|
312
345
|
|
|
313
|
-
def assert_http_ok(
|
|
346
|
+
def assert_http_ok(
|
|
347
|
+
url: str, method: str = "GET", allow_redirects: bool = True, timeout: float = 10.0
|
|
348
|
+
) -> bool:
|
|
314
349
|
try:
|
|
315
350
|
import ssl
|
|
316
351
|
|
|
@@ -387,7 +422,14 @@ def persist_api_key(key: str) -> None:
|
|
|
387
422
|
_write_state(data)
|
|
388
423
|
|
|
389
424
|
|
|
390
|
-
def run_job(
|
|
425
|
+
def run_job(
|
|
426
|
+
env: DemoEnv,
|
|
427
|
+
config_toml_path: str,
|
|
428
|
+
*,
|
|
429
|
+
batch_size: Optional[int] = None,
|
|
430
|
+
group_size: Optional[int] = None,
|
|
431
|
+
model: Optional[str] = None,
|
|
432
|
+
) -> None:
|
|
391
433
|
"""Create and stream a short RL job using the backend API (placeholder: prints cURL to execute)."""
|
|
392
434
|
backend = env.dev_backend_url.rstrip("/")
|
|
393
435
|
if backend.endswith("/api"):
|
|
@@ -396,9 +438,11 @@ def run_job(env: DemoEnv, config_toml_path: str, *, batch_size: Optional[int] =
|
|
|
396
438
|
api_base = backend + "/api"
|
|
397
439
|
print("\nTo create an RL job, run:")
|
|
398
440
|
print(
|
|
399
|
-
|
|
441
|
+
'curl -s -X POST "' + api_base + '/rl/jobs" '
|
|
400
442
|
"-H 'Content-Type: application/json' "
|
|
401
443
|
f"-H 'Authorization: Bearer {env.synth_api_key}' "
|
|
402
444
|
"-d '{" # intentionally not fully formed here for brevity in this scaffold
|
|
403
445
|
)
|
|
404
|
-
print(
|
|
446
|
+
print(
|
|
447
|
+
" NOTE: CLI implementation will build the full JSON body with inline TOML config and stream events."
|
|
448
|
+
)
|
|
@@ -8,8 +8,7 @@ variety = "fft"
|
|
|
8
8
|
|
|
9
9
|
[job]
|
|
10
10
|
model = "Qwen/Qwen3-4B"
|
|
11
|
-
|
|
12
|
-
# data = "../ft_data/qwen3_32b_ach_ge3_raw_filtered.head100.jsonl"
|
|
11
|
+
data = "ft_data/crafter_traces.jsonl"
|
|
13
12
|
|
|
14
13
|
[compute]
|
|
15
14
|
# Adjust as needed for your quota
|
|
@@ -23,7 +22,7 @@ topology = {}
|
|
|
23
22
|
|
|
24
23
|
# Optional local validation dataset path (JSONL). If set, the client will upload
|
|
25
24
|
# this file and wire up validation so the frontend can display val.loss.
|
|
26
|
-
validation_path = "../ft_data/
|
|
25
|
+
# validation_path = "../ft_data/crafter_validation.jsonl"
|
|
27
26
|
|
|
28
27
|
[training]
|
|
29
28
|
mode = "sft_offline"
|
|
@@ -1,10 +1,9 @@
|
|
|
1
|
-
|
|
2
1
|
"""Compatibility wrapper for the GRPO Crafter task app.
|
|
3
2
|
|
|
4
|
-
This module now delegates to the
|
|
5
|
-
`
|
|
6
|
-
file directly or targeting `fastapi_app` from external tooling).
|
|
7
|
-
`uvx synth-ai serve grpo-crafter` for local development and testing.
|
|
3
|
+
This module now delegates to the TaskAppConfig defined in the local example at
|
|
4
|
+
`examples/warming_up_to_rl/task_app/grpo_crafter.py`. It is kept for legacy usage
|
|
5
|
+
(running the file directly or targeting `fastapi_app` from external tooling).
|
|
6
|
+
Prefer using `uvx synth-ai serve grpo-crafter` for local development and testing.
|
|
8
7
|
"""
|
|
9
8
|
|
|
10
9
|
from __future__ import annotations
|
|
@@ -17,35 +16,43 @@ from fastapi.responses import JSONResponse
|
|
|
17
16
|
from starlette.requests import Request
|
|
18
17
|
|
|
19
18
|
from synth_ai.task.apps import ModalDeploymentConfig, registry
|
|
20
|
-
from synth_ai.task.apps.grpo_crafter import build_config
|
|
21
19
|
from synth_ai.task.auth import is_api_key_header_authorized, normalize_environment_api_key
|
|
22
20
|
from synth_ai.task.server import TaskAppConfig, create_task_app, run_task_app
|
|
21
|
+
import importlib.util
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _load_build_config():
|
|
25
|
+
# Find synth_ai package location to locate examples/
|
|
26
|
+
import synth_ai
|
|
27
|
+
|
|
28
|
+
synth_ai_path = Path(synth_ai.__file__).resolve().parent.parent
|
|
29
|
+
module_path = synth_ai_path / "examples" / "warming_up_to_rl" / "task_app" / "grpo_crafter.py"
|
|
30
|
+
|
|
31
|
+
if not module_path.exists():
|
|
32
|
+
raise ImportError(
|
|
33
|
+
f"Could not find task app module at {module_path}. Make sure you're running from the synth-ai repository."
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
spec = importlib.util.spec_from_file_location(
|
|
37
|
+
"warming_up_to_rl.task_app.grpo_crafter", module_path
|
|
38
|
+
)
|
|
39
|
+
if spec is None or spec.loader is None:
|
|
40
|
+
raise ImportError(f"Could not load task app module at {module_path}")
|
|
41
|
+
module = importlib.util.module_from_spec(spec)
|
|
42
|
+
spec.loader.exec_module(module)
|
|
43
|
+
return getattr(module, "build_config")
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
build_config = _load_build_config()
|
|
23
47
|
|
|
24
48
|
|
|
25
49
|
APP_ID = "grpo-crafter"
|
|
26
50
|
|
|
27
51
|
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
description=_BASE_CONFIG.description,
|
|
33
|
-
base_task_info=_BASE_CONFIG.base_task_info,
|
|
34
|
-
describe_taskset=_BASE_CONFIG.describe_taskset,
|
|
35
|
-
provide_task_instances=_BASE_CONFIG.provide_task_instances,
|
|
36
|
-
rollout=_BASE_CONFIG.rollout,
|
|
37
|
-
dataset_registry=_BASE_CONFIG.dataset_registry,
|
|
38
|
-
rubrics=_BASE_CONFIG.rubrics,
|
|
39
|
-
proxy=_BASE_CONFIG.proxy,
|
|
40
|
-
routers=_BASE_CONFIG.routers,
|
|
41
|
-
middleware=_BASE_CONFIG.middleware,
|
|
42
|
-
app_state=_BASE_CONFIG.app_state,
|
|
43
|
-
require_api_key=_BASE_CONFIG.require_api_key,
|
|
44
|
-
expose_debug_env=_BASE_CONFIG.expose_debug_env,
|
|
45
|
-
cors_origins=_BASE_CONFIG.cors_origins,
|
|
46
|
-
startup_hooks=_BASE_CONFIG.startup_hooks,
|
|
47
|
-
shutdown_hooks=_BASE_CONFIG.shutdown_hooks,
|
|
48
|
-
)
|
|
52
|
+
def _build_base_config() -> TaskAppConfig:
|
|
53
|
+
# Lazily construct the base config to avoid heavy work at import time
|
|
54
|
+
return build_config()
|
|
55
|
+
|
|
49
56
|
|
|
50
57
|
try:
|
|
51
58
|
_REGISTERED_ENTRY = registry.get(APP_ID)
|
|
@@ -60,7 +67,8 @@ else:
|
|
|
60
67
|
def build_task_app_config() -> TaskAppConfig:
|
|
61
68
|
"""Return a fresh TaskAppConfig for this wrapper."""
|
|
62
69
|
|
|
63
|
-
|
|
70
|
+
base = _build_base_config()
|
|
71
|
+
return base.clone()
|
|
64
72
|
|
|
65
73
|
|
|
66
74
|
def fastapi_app():
|
|
@@ -6,6 +6,7 @@ This module provides a local fallback for install_problem_bank_into_shared so
|
|
|
6
6
|
the modal task app can import it without requiring an external math_rl package.
|
|
7
7
|
"""
|
|
8
8
|
|
|
9
|
+
|
|
9
10
|
def install_problem_bank_into_shared() -> None:
|
|
10
11
|
"""No-op placeholder for installing the Hendrycks MATH problem bank.
|
|
11
12
|
|
|
@@ -13,5 +14,3 @@ def install_problem_bank_into_shared() -> None:
|
|
|
13
14
|
into a shared directory. For the demo scaffold, it is a no-op.
|
|
14
15
|
"""
|
|
15
16
|
return None
|
|
16
|
-
|
|
17
|
-
|
|
@@ -25,7 +25,9 @@ _SYNTH_HOSTED = None
|
|
|
25
25
|
try:
|
|
26
26
|
probe = _HERE
|
|
27
27
|
for _ in range(8):
|
|
28
|
-
candidate = (
|
|
28
|
+
candidate = (
|
|
29
|
+
probe / "backend/app/routes/clustered_training/dev/synth_envs_hosted"
|
|
30
|
+
).resolve()
|
|
29
31
|
if candidate.exists():
|
|
30
32
|
_SYNTH_HOSTED = candidate
|
|
31
33
|
break
|
|
@@ -101,12 +103,14 @@ def fastapi_app():
|
|
|
101
103
|
from fastapi import FastAPI
|
|
102
104
|
from fastapi.middleware.cors import CORSMiddleware
|
|
103
105
|
from fastapi.responses import JSONResponse
|
|
106
|
+
|
|
104
107
|
try:
|
|
105
108
|
from synth_ai.task.auth import (
|
|
106
109
|
is_api_key_header_authorized,
|
|
107
110
|
normalize_environment_api_key,
|
|
108
111
|
)
|
|
109
112
|
except Exception: # pragma: no cover - fallback for older synth-ai builds
|
|
113
|
+
|
|
110
114
|
def _normalize_env_key_fallback() -> str | None:
|
|
111
115
|
key = os.getenv("ENVIRONMENT_API_KEY")
|
|
112
116
|
if key:
|
|
@@ -130,7 +134,7 @@ def fastapi_app():
|
|
|
130
134
|
for value in values:
|
|
131
135
|
if not isinstance(value, str):
|
|
132
136
|
continue
|
|
133
|
-
for chunk in value.split(
|
|
137
|
+
for chunk in value.split(","):
|
|
134
138
|
chunk = chunk.strip()
|
|
135
139
|
if chunk:
|
|
136
140
|
parts.append(chunk)
|
|
@@ -172,19 +176,27 @@ def fastapi_app():
|
|
|
172
176
|
|
|
173
177
|
def _normalize_answer_text(s: str) -> str:
|
|
174
178
|
import re as _re
|
|
179
|
+
|
|
175
180
|
return _re.sub(r"[^0-9A-Za-z.+\-/*=]", "", (s or "").strip()).lower()
|
|
176
181
|
|
|
177
182
|
def _extract_boxed(s: str) -> str:
|
|
178
183
|
import re as _re
|
|
184
|
+
|
|
179
185
|
m = list(_re.finditer(r"\\boxed\{([^}]+)\}", s or ""))
|
|
180
186
|
return m[-1].group(1) if m else ""
|
|
181
187
|
|
|
182
188
|
def _load_hendrycks_problem(seed: int, subject: str | None = None) -> tuple[str, str]:
|
|
183
189
|
subj = subject or os.getenv("HENDRYCKS_MATH_CONFIG", "default")
|
|
184
|
-
ds = _hf_split(
|
|
190
|
+
ds = _hf_split(
|
|
191
|
+
subj, os.getenv("HENDRYCKS_MATH_SPLIT", "test"), os.getenv("HENDRYCKS_MATH_SLICE")
|
|
192
|
+
)
|
|
185
193
|
n = len(ds) if hasattr(ds, "__len__") else 0
|
|
186
194
|
if n == 0 and subject not in {"", "default"}:
|
|
187
|
-
ds = _hf_split(
|
|
195
|
+
ds = _hf_split(
|
|
196
|
+
"default",
|
|
197
|
+
os.getenv("HENDRYCKS_MATH_SPLIT", "test"),
|
|
198
|
+
os.getenv("HENDRYCKS_MATH_SLICE"),
|
|
199
|
+
)
|
|
188
200
|
n = len(ds) if hasattr(ds, "__len__") else 0
|
|
189
201
|
if n == 0:
|
|
190
202
|
raise RuntimeError("Hendrycks MATH dataset loaded empty")
|
|
@@ -225,7 +237,11 @@ def fastapi_app():
|
|
|
225
237
|
|
|
226
238
|
def _resolve_env_keys() -> set[str]:
|
|
227
239
|
keys: set[str] = set()
|
|
228
|
-
for alias in (
|
|
240
|
+
for alias in (
|
|
241
|
+
"ENVIRONMENT_API_KEY",
|
|
242
|
+
"dev_environment_api_key",
|
|
243
|
+
"DEV_ENVIRONMENT_API_KEY",
|
|
244
|
+
):
|
|
229
245
|
value = os.environ.get(alias)
|
|
230
246
|
if value:
|
|
231
247
|
os.environ.setdefault("ENVIRONMENT_API_KEY", value)
|
|
@@ -250,8 +266,12 @@ def fastapi_app():
|
|
|
250
266
|
candidates.append(primary.strip())
|
|
251
267
|
secondary = x_api_keys or headers.get("x-api-keys")
|
|
252
268
|
if secondary:
|
|
253
|
-
candidates.extend(
|
|
254
|
-
|
|
269
|
+
candidates.extend(
|
|
270
|
+
[value.strip() for value in secondary.split(",") if value.strip()]
|
|
271
|
+
)
|
|
272
|
+
auth_header = (
|
|
273
|
+
authorization or headers.get("authorization") or headers.get("Authorization")
|
|
274
|
+
)
|
|
255
275
|
if auth_header and auth_header.lower().startswith("bearer "):
|
|
256
276
|
token = auth_header.split(" ", 1)[1].strip()
|
|
257
277
|
if token:
|
|
@@ -274,7 +294,10 @@ def fastapi_app():
|
|
|
274
294
|
async def info():
|
|
275
295
|
return {
|
|
276
296
|
"service": {"base_url": os.getenv("SERVICE_BASE_URL", "")},
|
|
277
|
-
"inference": {
|
|
297
|
+
"inference": {
|
|
298
|
+
"base_url": "",
|
|
299
|
+
"endpoints": {"chat_completions": "/v1/chat/completions"},
|
|
300
|
+
},
|
|
278
301
|
}
|
|
279
302
|
|
|
280
303
|
@app.get("/health")
|
|
@@ -282,7 +305,10 @@ def fastapi_app():
|
|
|
282
305
|
env_keys = _resolve_env_keys()
|
|
283
306
|
env_key = next(iter(env_keys), None)
|
|
284
307
|
if not env_key:
|
|
285
|
-
return JSONResponse(
|
|
308
|
+
return JSONResponse(
|
|
309
|
+
status_code=503,
|
|
310
|
+
content={"status": "unhealthy", "detail": "Missing ENVIRONMENT_API_KEY"},
|
|
311
|
+
)
|
|
286
312
|
# Authorize using all header variants; avoid typed Header params to prevent 422s
|
|
287
313
|
authorized = is_api_key_header_authorized(request)
|
|
288
314
|
if not authorized:
|
|
@@ -302,7 +328,10 @@ def fastapi_app():
|
|
|
302
328
|
env_keys = _resolve_env_keys()
|
|
303
329
|
env_key = next(iter(env_keys), None)
|
|
304
330
|
if not env_key:
|
|
305
|
-
return JSONResponse(
|
|
331
|
+
return JSONResponse(
|
|
332
|
+
status_code=503,
|
|
333
|
+
content={"status": "unhealthy", "detail": "Missing ENVIRONMENT_API_KEY"},
|
|
334
|
+
)
|
|
306
335
|
authorized = is_api_key_header_authorized(request)
|
|
307
336
|
if not authorized:
|
|
308
337
|
prefix = _log_env_key_prefix("health/rollout", env_key)
|
|
@@ -321,17 +350,22 @@ def fastapi_app():
|
|
|
321
350
|
async def task_info(seed: int = 0, subject: str = "default"):
|
|
322
351
|
"""Return Hendrycks MATH problem/answer and tool schema for a seed."""
|
|
323
352
|
q, a = _load_hendrycks_problem(int(seed), subject=subject)
|
|
324
|
-
tools = [
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
"
|
|
329
|
-
|
|
330
|
-
"
|
|
353
|
+
tools = [
|
|
354
|
+
{
|
|
355
|
+
"name": "submit_answer",
|
|
356
|
+
"description": "Provide the final numerical or algebraic answer for the current math problem.",
|
|
357
|
+
"parameters": {
|
|
358
|
+
"type": "object",
|
|
359
|
+
"properties": {
|
|
360
|
+
"answer": {
|
|
361
|
+
"type": "string",
|
|
362
|
+
"description": "The proposed final answer",
|
|
363
|
+
},
|
|
364
|
+
},
|
|
365
|
+
"required": ["answer"],
|
|
331
366
|
},
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
}]
|
|
367
|
+
}
|
|
368
|
+
]
|
|
335
369
|
return {
|
|
336
370
|
"seed": int(seed),
|
|
337
371
|
"subject": subject,
|
|
@@ -363,7 +397,9 @@ def fastapi_app():
|
|
|
363
397
|
print("[422] validation", snapshot, flush=True)
|
|
364
398
|
except Exception:
|
|
365
399
|
pass
|
|
366
|
-
return JSONResponse(
|
|
400
|
+
return JSONResponse(
|
|
401
|
+
status_code=422, content={"status": "invalid", "detail": exc.errors()[:5]}
|
|
402
|
+
)
|
|
367
403
|
|
|
368
404
|
@api.get("/")
|
|
369
405
|
async def root_probe():
|
|
@@ -381,7 +417,12 @@ def fastapi_app():
|
|
|
381
417
|
if not env_key:
|
|
382
418
|
raise RuntimeError("ENVIRONMENT_API_KEY missing in task app environment")
|
|
383
419
|
|
|
384
|
-
OPENAI_REMOVE_FIELDS = (
|
|
420
|
+
OPENAI_REMOVE_FIELDS = (
|
|
421
|
+
"stop_after_tool_calls",
|
|
422
|
+
"thinking_mode",
|
|
423
|
+
"thinking_budget",
|
|
424
|
+
"reasoning",
|
|
425
|
+
)
|
|
385
426
|
OPENAI_REMOVE_SAMPLING_FIELDS = ("temperature", "top_p")
|
|
386
427
|
TOOL_CHOICE_FORCE = {"type": "function", "function": {"name": "submit_answer"}}
|
|
387
428
|
|
|
@@ -404,12 +445,18 @@ def fastapi_app():
|
|
|
404
445
|
def proxy_chat_completions(request: dict[str, object] = Body(...)):
|
|
405
446
|
key = os.environ.get("OPENAI_API_KEY")
|
|
406
447
|
if not key:
|
|
407
|
-
raise HTTPException(
|
|
448
|
+
raise HTTPException(
|
|
449
|
+
status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail="OPENAI_API_KEY missing"
|
|
450
|
+
)
|
|
408
451
|
model = request.get("model") if isinstance(request, dict) else None
|
|
409
|
-
payload = _prepare_openai_payload(
|
|
452
|
+
payload = _prepare_openai_payload(
|
|
453
|
+
model if isinstance(model, str) else None, request if isinstance(request, dict) else {}
|
|
454
|
+
)
|
|
410
455
|
headers = {"Authorization": f"Bearer {key}"}
|
|
411
456
|
with httpx.Client(timeout=httpx.Timeout(180.0), follow_redirects=True) as client:
|
|
412
|
-
resp = client.post(
|
|
457
|
+
resp = client.post(
|
|
458
|
+
"https://api.openai.com/v1/chat/completions", json=payload, headers=headers
|
|
459
|
+
)
|
|
413
460
|
try:
|
|
414
461
|
data = resp.json()
|
|
415
462
|
except Exception:
|
|
@@ -442,15 +489,25 @@ def fastapi_app():
|
|
|
442
489
|
env_cfg = (env or {}).get("config") or {}
|
|
443
490
|
# Prefer env.seed; fall back to env.config.seed -> default 0
|
|
444
491
|
try:
|
|
445
|
-
seed_val =
|
|
492
|
+
seed_val = (
|
|
493
|
+
int((env or {}).get("seed"))
|
|
494
|
+
if isinstance(env, dict) and (env or {}).get("seed") is not None
|
|
495
|
+
else 0
|
|
496
|
+
)
|
|
446
497
|
except Exception:
|
|
447
498
|
seed_val = 0
|
|
448
499
|
if seed_val == 0:
|
|
449
500
|
try:
|
|
450
|
-
seed_val =
|
|
501
|
+
seed_val = (
|
|
502
|
+
int(env_cfg.get("seed"))
|
|
503
|
+
if isinstance(env_cfg, dict) and env_cfg.get("seed") is not None
|
|
504
|
+
else 0
|
|
505
|
+
)
|
|
451
506
|
except Exception:
|
|
452
507
|
seed_val = 0
|
|
453
|
-
subject = (env_cfg.get("subject") if isinstance(env_cfg, dict) else None) or os.getenv(
|
|
508
|
+
subject = (env_cfg.get("subject") if isinstance(env_cfg, dict) else None) or os.getenv(
|
|
509
|
+
"HENDRYCKS_MATH_CONFIG", "default"
|
|
510
|
+
)
|
|
454
511
|
# Load real Hendrycks problem text/solution (download if necessary). Crash on failure.
|
|
455
512
|
qh, ah = _load_hendrycks_problem(seed_val, subject=subject)
|
|
456
513
|
question = qh
|
|
@@ -468,7 +525,10 @@ def fastapi_app():
|
|
|
468
525
|
sanitized.pop("max_tokens", None)
|
|
469
526
|
for field in ("temperature", "top_p"):
|
|
470
527
|
sanitized.pop(field, None)
|
|
471
|
-
sanitized["tool_choice"] = {
|
|
528
|
+
sanitized["tool_choice"] = {
|
|
529
|
+
"type": "function",
|
|
530
|
+
"function": {"name": "submit_answer"},
|
|
531
|
+
}
|
|
472
532
|
sanitized["parallel_tool_calls"] = False
|
|
473
533
|
return sanitized
|
|
474
534
|
|
|
@@ -509,19 +569,21 @@ def fastapi_app():
|
|
|
509
569
|
payload = {
|
|
510
570
|
"model": model,
|
|
511
571
|
"messages": [{"role": "user", "content": user_prompt}],
|
|
512
|
-
"tools": [
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
"
|
|
516
|
-
|
|
517
|
-
"
|
|
518
|
-
|
|
519
|
-
"
|
|
572
|
+
"tools": [
|
|
573
|
+
{
|
|
574
|
+
"type": "function",
|
|
575
|
+
"function": {
|
|
576
|
+
"name": "submit_answer",
|
|
577
|
+
"parameters": {
|
|
578
|
+
"type": "object",
|
|
579
|
+
"properties": {
|
|
580
|
+
"answer": {"type": "string"},
|
|
581
|
+
},
|
|
582
|
+
"required": ["answer"],
|
|
520
583
|
},
|
|
521
|
-
"required": ["answer"],
|
|
522
584
|
},
|
|
523
|
-
}
|
|
524
|
-
|
|
585
|
+
}
|
|
586
|
+
],
|
|
525
587
|
"max_tokens": 256,
|
|
526
588
|
"temperature": 0.2,
|
|
527
589
|
}
|
|
@@ -529,7 +591,7 @@ def fastapi_app():
|
|
|
529
591
|
|
|
530
592
|
try:
|
|
531
593
|
tool_names = []
|
|
532
|
-
for t in
|
|
594
|
+
for t in payload.get("tools") or []:
|
|
533
595
|
if isinstance(t, dict):
|
|
534
596
|
fn = (t.get("function") or {}) if isinstance(t.get("function"), dict) else {}
|
|
535
597
|
name = fn.get("name")
|
|
@@ -547,7 +609,9 @@ def fastapi_app():
|
|
|
547
609
|
if sk:
|
|
548
610
|
headers["Authorization"] = f"Bearer {sk}"
|
|
549
611
|
with httpx.Client(timeout=httpx.Timeout(180.0), follow_redirects=True) as client:
|
|
550
|
-
resp = client.post(
|
|
612
|
+
resp = client.post(
|
|
613
|
+
f"{inference_url}/v1/chat/completions", json=to_send, headers=headers
|
|
614
|
+
)
|
|
551
615
|
try:
|
|
552
616
|
data = resp.json()
|
|
553
617
|
except Exception:
|
|
@@ -580,14 +644,21 @@ def fastapi_app():
|
|
|
580
644
|
|
|
581
645
|
tool_answer = _parse_tool_answer(data)
|
|
582
646
|
history.append({"answer": tool_answer})
|
|
583
|
-
steps.append(
|
|
584
|
-
|
|
585
|
-
|
|
586
|
-
|
|
587
|
-
|
|
588
|
-
|
|
589
|
-
|
|
590
|
-
|
|
647
|
+
steps.append(
|
|
648
|
+
{
|
|
649
|
+
"obs": {},
|
|
650
|
+
"tool_calls": [
|
|
651
|
+
{
|
|
652
|
+
"tool_name": "submit_answer",
|
|
653
|
+
"arguments": _json.dumps({"answer": tool_answer}),
|
|
654
|
+
}
|
|
655
|
+
],
|
|
656
|
+
"reward": None,
|
|
657
|
+
"done": False,
|
|
658
|
+
"truncated": False,
|
|
659
|
+
"info": None,
|
|
660
|
+
}
|
|
661
|
+
)
|
|
591
662
|
|
|
592
663
|
# Evaluate answer correctness using tool output (or fall back to assistant text)
|
|
593
664
|
reward_val = 0.0
|
|
@@ -605,25 +676,57 @@ def fastapi_app():
|
|
|
605
676
|
except Exception:
|
|
606
677
|
reward_val = 0.0
|
|
607
678
|
|
|
679
|
+
# Immediate, concise rollout logging mirroring RL format
|
|
680
|
+
try:
|
|
681
|
+
preview = tool_answer[:120] + (
|
|
682
|
+
"…" if isinstance(tool_answer, str) and len(tool_answer) > 120 else ""
|
|
683
|
+
)
|
|
684
|
+
components = {
|
|
685
|
+
"env": float(reward_val),
|
|
686
|
+
"rubric_event": 1.0 if bool(tool_answer.strip()) else 0.0,
|
|
687
|
+
"rubric_outcome": 1.0 if float(reward_val) > 0.0 else 0.0,
|
|
688
|
+
}
|
|
689
|
+
print(
|
|
690
|
+
"[MATH_ROLLOUT] run=",
|
|
691
|
+
run_id,
|
|
692
|
+
" seed=",
|
|
693
|
+
seed_val,
|
|
694
|
+
" subject=",
|
|
695
|
+
subject,
|
|
696
|
+
" tool=submit_answer answer=",
|
|
697
|
+
preview,
|
|
698
|
+
" reward=",
|
|
699
|
+
float(reward_val),
|
|
700
|
+
" components=",
|
|
701
|
+
components,
|
|
702
|
+
flush=True,
|
|
703
|
+
)
|
|
704
|
+
except Exception:
|
|
705
|
+
pass
|
|
706
|
+
|
|
608
707
|
total_reward += float(reward_val)
|
|
609
|
-
steps.append(
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
708
|
+
steps.append(
|
|
709
|
+
{
|
|
710
|
+
"obs": {},
|
|
711
|
+
"tool_calls": [],
|
|
712
|
+
"reward": reward_val,
|
|
713
|
+
"done": True,
|
|
714
|
+
"truncated": False,
|
|
715
|
+
"info": None,
|
|
716
|
+
}
|
|
717
|
+
)
|
|
617
718
|
|
|
618
719
|
return {
|
|
619
720
|
"run_id": run_id,
|
|
620
|
-
"trajectories": [
|
|
621
|
-
|
|
622
|
-
|
|
623
|
-
|
|
624
|
-
|
|
625
|
-
|
|
626
|
-
|
|
721
|
+
"trajectories": [
|
|
722
|
+
{
|
|
723
|
+
"env_id": env_name,
|
|
724
|
+
"policy_id": (policy or {}).get("policy_name") or "math-react",
|
|
725
|
+
"steps": steps,
|
|
726
|
+
"final": {"observation": {}},
|
|
727
|
+
"length": len(steps),
|
|
728
|
+
}
|
|
729
|
+
],
|
|
627
730
|
"branches": {},
|
|
628
731
|
"metrics": {
|
|
629
732
|
"episode_returns": [total_reward],
|