synth-ai 0.2.9.dev4__py3-none-any.whl → 0.2.9.dev7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- examples/common_old/backend.py +0 -1
- examples/crafter_debug_render.py +15 -6
- examples/evals_old/compare_models.py +1 -0
- examples/finetuning_old/_backup_synth_qwen/filter_traces_achievements.py +6 -2
- examples/finetuning_old/_backup_synth_qwen/react_agent_lm.py +4 -4
- examples/finetuning_old/_backup_synth_qwen/sft_kickoff.py +4 -3
- examples/finetuning_old/synth_qwen_v1/filter_traces_achievements.py +6 -2
- examples/finetuning_old/synth_qwen_v1/finetune.py +1 -1
- examples/finetuning_old/synth_qwen_v1/hello_ft_model.py +4 -4
- examples/finetuning_old/synth_qwen_v1/infer.py +1 -2
- examples/finetuning_old/synth_qwen_v1/poll.py +4 -2
- examples/finetuning_old/synth_qwen_v1/prepare_data.py +8 -8
- examples/finetuning_old/synth_qwen_v1/react_agent_lm.py +5 -4
- examples/finetuning_old/synth_qwen_v1/run_crafter_sft_job.py +11 -8
- examples/finetuning_old/synth_qwen_v1/run_ft_job.py +17 -12
- examples/finetuning_old/synth_qwen_v1/upload_data.py +1 -1
- examples/finetuning_old/synth_qwen_v1/util.py +7 -2
- examples/rl/configs/eval_base_qwen.toml +1 -1
- examples/rl/configs/rl_from_base_qwen17.toml +1 -1
- examples/rl/download_dataset.py +26 -10
- examples/rl/run_eval.py +17 -15
- examples/rl/run_rl_and_save.py +24 -7
- examples/rl/task_app/math_single_step.py +128 -11
- examples/rl/task_app/math_task_app.py +11 -3
- examples/rl_old/task_app.py +222 -53
- examples/warming_up_to_rl/analyze_trace_db.py +7 -5
- examples/warming_up_to_rl/export_trace_sft.py +141 -16
- examples/warming_up_to_rl/groq_test.py +11 -4
- examples/warming_up_to_rl/manage_secrets.py +15 -6
- examples/warming_up_to_rl/readme.md +9 -2
- examples/warming_up_to_rl/run_eval.py +108 -30
- examples/warming_up_to_rl/run_fft_and_save.py +128 -52
- examples/warming_up_to_rl/run_local_rollout.py +87 -36
- examples/warming_up_to_rl/run_local_rollout_modal.py +113 -25
- examples/warming_up_to_rl/run_local_rollout_parallel.py +80 -16
- examples/warming_up_to_rl/run_local_rollout_traced.py +125 -20
- examples/warming_up_to_rl/run_rl_and_save.py +31 -7
- examples/warming_up_to_rl/run_rollout_remote.py +37 -10
- examples/warming_up_to_rl/task_app/grpo_crafter.py +90 -27
- examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +9 -27
- examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +46 -108
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +50 -17
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +35 -21
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +8 -4
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +29 -26
- examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +17 -13
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +106 -63
- examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +82 -84
- examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +76 -59
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +1 -1
- examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +43 -49
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +5 -15
- synth_ai/__init__.py +1 -0
- synth_ai/api/train/builders.py +34 -10
- synth_ai/api/train/cli.py +172 -32
- synth_ai/api/train/config_finder.py +59 -4
- synth_ai/api/train/env_resolver.py +32 -14
- synth_ai/api/train/pollers.py +11 -3
- synth_ai/api/train/task_app.py +4 -1
- synth_ai/api/train/utils.py +20 -4
- synth_ai/cli/__init__.py +11 -4
- synth_ai/cli/balance.py +1 -1
- synth_ai/cli/demo.py +19 -5
- synth_ai/cli/rl_demo.py +75 -16
- synth_ai/cli/root.py +116 -37
- synth_ai/cli/task_apps.py +1286 -170
- synth_ai/cli/traces.py +1 -0
- synth_ai/cli/turso.py +73 -0
- synth_ai/core/experiment.py +0 -2
- synth_ai/demo_registry.py +67 -30
- synth_ai/demos/core/cli.py +493 -164
- synth_ai/demos/demo_task_apps/core.py +50 -6
- synth_ai/demos/demo_task_apps/crafter/configs/crafter_fft_4b.toml +2 -3
- synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +36 -28
- synth_ai/demos/demo_task_apps/math/_common.py +1 -2
- synth_ai/demos/demo_task_apps/math/deploy_modal.py +0 -2
- synth_ai/demos/demo_task_apps/math/modal_task_app.py +168 -65
- synth_ai/demos/demo_task_apps/math/task_app_entry.py +0 -1
- synth_ai/environments/examples/bandit/engine.py +12 -4
- synth_ai/environments/examples/bandit/taskset.py +4 -4
- synth_ai/environments/reproducibility/tree.py +3 -1
- synth_ai/environments/service/core_routes.py +6 -2
- synth_ai/evals/base.py +0 -2
- synth_ai/experimental/synth_oss.py +11 -12
- synth_ai/handshake.py +3 -1
- synth_ai/http_client.py +31 -7
- synth_ai/inference/__init__.py +0 -2
- synth_ai/inference/client.py +8 -4
- synth_ai/jobs/client.py +40 -10
- synth_ai/learning/client.py +33 -8
- synth_ai/learning/config.py +0 -2
- synth_ai/learning/constants.py +0 -2
- synth_ai/learning/ft_client.py +6 -3
- synth_ai/learning/health.py +9 -2
- synth_ai/learning/jobs.py +17 -5
- synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +1 -3
- synth_ai/learning/prompts/random_search.py +4 -1
- synth_ai/learning/prompts/run_random_search_banking77.py +6 -1
- synth_ai/learning/rl_client.py +42 -14
- synth_ai/learning/sse.py +0 -2
- synth_ai/learning/validators.py +6 -2
- synth_ai/lm/caching/ephemeral.py +1 -3
- synth_ai/lm/core/exceptions.py +0 -2
- synth_ai/lm/core/main.py +13 -1
- synth_ai/lm/core/synth_models.py +0 -1
- synth_ai/lm/core/vendor_clients.py +4 -2
- synth_ai/lm/overrides.py +2 -2
- synth_ai/lm/vendors/core/anthropic_api.py +7 -7
- synth_ai/lm/vendors/core/openai_api.py +2 -0
- synth_ai/lm/vendors/openai_standard.py +3 -1
- synth_ai/lm/vendors/openai_standard_responses.py +6 -3
- synth_ai/lm/vendors/supported/custom_endpoint.py +1 -3
- synth_ai/lm/vendors/synth_client.py +37 -10
- synth_ai/rl/__init__.py +0 -1
- synth_ai/rl/contracts.py +0 -2
- synth_ai/rl/env_keys.py +6 -1
- synth_ai/task/__init__.py +1 -0
- synth_ai/task/apps/__init__.py +11 -11
- synth_ai/task/auth.py +29 -17
- synth_ai/task/client.py +3 -1
- synth_ai/task/contracts.py +1 -0
- synth_ai/task/datasets.py +3 -1
- synth_ai/task/errors.py +3 -2
- synth_ai/task/health.py +0 -2
- synth_ai/task/json.py +0 -1
- synth_ai/task/proxy.py +2 -5
- synth_ai/task/rubrics.py +9 -3
- synth_ai/task/server.py +31 -5
- synth_ai/task/tracing_utils.py +8 -3
- synth_ai/task/validators.py +0 -1
- synth_ai/task/vendors.py +0 -1
- synth_ai/tracing_v3/db_config.py +26 -1
- synth_ai/tracing_v3/decorators.py +1 -0
- synth_ai/tracing_v3/examples/basic_usage.py +3 -2
- synth_ai/tracing_v3/hooks.py +2 -0
- synth_ai/tracing_v3/replica_sync.py +1 -0
- synth_ai/tracing_v3/session_tracer.py +24 -3
- synth_ai/tracing_v3/storage/base.py +4 -1
- synth_ai/tracing_v3/storage/factory.py +0 -1
- synth_ai/tracing_v3/turso/manager.py +102 -38
- synth_ai/tracing_v3/turso/models.py +4 -1
- synth_ai/tracing_v3/utils.py +1 -0
- synth_ai/v0/tracing/upload.py +32 -135
- {synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev7.dist-info}/METADATA +1 -1
- {synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev7.dist-info}/RECORD +154 -156
- examples/warming_up_to_rl/task_app/synth_envs_hosted/test_stepwise_rewards.py +0 -58
- synth_ai/environments/examples/sokoban/units/astar_common.py +0 -95
- synth_ai/install_sqld.sh +0 -40
- {synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev7.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev7.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev7.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.2.9.dev4.dist-info → synth_ai-0.2.9.dev7.dist-info}/top_level.txt +0 -0
examples/rl_old/task_app.py
CHANGED
|
@@ -15,7 +15,15 @@ if "/opt" not in _sys.path:
|
|
|
15
15
|
_sys.path.insert(0, "/opt")
|
|
16
16
|
|
|
17
17
|
# Use environment-aware names to avoid collisions across dev/prod
|
|
18
|
-
_env_flag = (
|
|
18
|
+
_env_flag = (
|
|
19
|
+
(
|
|
20
|
+
_os.getenv("SYNTH_BACKEND_URL_OVERRIDE", "")
|
|
21
|
+
or _os.getenv("ENVIRONMENT", "")
|
|
22
|
+
or _os.getenv("APP_ENVIRONMENT", "")
|
|
23
|
+
)
|
|
24
|
+
.strip()
|
|
25
|
+
.lower()
|
|
26
|
+
)
|
|
19
27
|
_is_prod = _env_flag in ("prod", "production")
|
|
20
28
|
|
|
21
29
|
# Secret name must be provided explicitly via TASK_APP_SECRET_NAME
|
|
@@ -85,7 +93,9 @@ image = (
|
|
|
85
93
|
]
|
|
86
94
|
)
|
|
87
95
|
# Bundle the crafter module into the image for imports at runtime (absolute path)
|
|
88
|
-
.add_local_dir(
|
|
96
|
+
.add_local_dir(
|
|
97
|
+
str((_HERE.parent / "crafter_task_app_helpers").resolve()), "/opt/crafter_task_app_helpers"
|
|
98
|
+
)
|
|
89
99
|
# Bundle synth_ai package to import full environment implementation.
|
|
90
100
|
# Resolve repo root robustly (examples/rl/task_app.py -> repo_root = examples/rl/../../..)
|
|
91
101
|
.add_local_dir(str((_HERE.parent.parent.parent / "synth_ai").resolve()), "/opt/synth_ai")
|
|
@@ -102,7 +112,10 @@ OPENAI_REMOVE_FIELDS = (
|
|
|
102
112
|
OPENAI_REMOVE_SAMPLING_FIELDS = ("temperature", "top_p")
|
|
103
113
|
OPENAI_TOOL_CHOICE_FORCED = {"type": "function", "function": {"name": "interact"}}
|
|
104
114
|
|
|
105
|
-
|
|
115
|
+
|
|
116
|
+
def prepare_inference_payload_for_model(
|
|
117
|
+
model: str | None, payload: dict[str, Any]
|
|
118
|
+
) -> dict[str, Any]:
|
|
106
119
|
"""Sanitize payload for OpenAI API.
|
|
107
120
|
|
|
108
121
|
- Always strip Synth-specific fields not supported by OpenAI (e.g., stop_after_tool_calls).
|
|
@@ -132,7 +145,13 @@ def prepare_inference_payload_for_model(model: str | None, payload: dict[str, An
|
|
|
132
145
|
out["parallel_tool_calls"] = False
|
|
133
146
|
return out
|
|
134
147
|
|
|
135
|
-
|
|
148
|
+
|
|
149
|
+
@app.function(
|
|
150
|
+
image=image,
|
|
151
|
+
secrets=[modal.Secret.from_name(MODAL_SECRET_NAME)],
|
|
152
|
+
min_containers=1,
|
|
153
|
+
max_containers=1,
|
|
154
|
+
)
|
|
136
155
|
@modal.asgi_app()
|
|
137
156
|
def fastapi_app():
|
|
138
157
|
# Import FastAPI/Pydantic inside the container runtime to avoid local import errors
|
|
@@ -144,6 +163,7 @@ def fastapi_app():
|
|
|
144
163
|
import sys
|
|
145
164
|
import os
|
|
146
165
|
import httpx
|
|
166
|
+
|
|
147
167
|
# Logger for debug output
|
|
148
168
|
logger = logging.getLogger(__name__)
|
|
149
169
|
|
|
@@ -154,6 +174,7 @@ def fastapi_app():
|
|
|
154
174
|
os.environ.setdefault("TURSO_LOCAL_DB_URL", "sqlite+aiosqlite:////tmp/synth_ai.db")
|
|
155
175
|
|
|
156
176
|
import importlib
|
|
177
|
+
|
|
157
178
|
preload_modules = [
|
|
158
179
|
# synth_ai core
|
|
159
180
|
"synth_ai",
|
|
@@ -254,10 +275,14 @@ def fastapi_app():
|
|
|
254
275
|
def health(request: Request):
|
|
255
276
|
env_key = os.environ.get("ENVIRONMENT_API_KEY")
|
|
256
277
|
if not env_key:
|
|
257
|
-
raise HTTPException(
|
|
278
|
+
raise HTTPException(
|
|
279
|
+
status_code=503,
|
|
280
|
+
detail="Auth not configured: missing ENVIRONMENT_API_KEY in task service environment",
|
|
281
|
+
)
|
|
258
282
|
# Authorize using all header variants; avoid typed Header to prevent 422s
|
|
259
283
|
try:
|
|
260
284
|
from synth_ai.task.auth import is_api_key_header_authorized
|
|
285
|
+
|
|
261
286
|
authorized = is_api_key_header_authorized(request)
|
|
262
287
|
except Exception:
|
|
263
288
|
# Fallback: check only x-api-key
|
|
@@ -275,9 +300,13 @@ def fastapi_app():
|
|
|
275
300
|
def health_rollout(request: Request):
|
|
276
301
|
expected = os.environ.get("ENVIRONMENT_API_KEY")
|
|
277
302
|
if not expected:
|
|
278
|
-
raise HTTPException(
|
|
303
|
+
raise HTTPException(
|
|
304
|
+
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
|
|
305
|
+
detail="Missing ENVIRONMENT_API_KEY in service env",
|
|
306
|
+
)
|
|
279
307
|
try:
|
|
280
308
|
from synth_ai.task.auth import is_api_key_header_authorized
|
|
309
|
+
|
|
281
310
|
authorized = is_api_key_header_authorized(request)
|
|
282
311
|
except Exception:
|
|
283
312
|
header_key = request.headers.get("x-api-key")
|
|
@@ -290,6 +319,7 @@ def fastapi_app():
|
|
|
290
319
|
|
|
291
320
|
# Log and surface 422 validation errors with header presence
|
|
292
321
|
from fastapi.exceptions import RequestValidationError
|
|
322
|
+
|
|
293
323
|
@api.exception_handler(RequestValidationError)
|
|
294
324
|
async def _on_validation_error(request: Request, exc: RequestValidationError):
|
|
295
325
|
try:
|
|
@@ -304,7 +334,9 @@ def fastapi_app():
|
|
|
304
334
|
print("[422] validation", snapshot, flush=True)
|
|
305
335
|
except Exception:
|
|
306
336
|
pass
|
|
307
|
-
return JSONResponse(
|
|
337
|
+
return JSONResponse(
|
|
338
|
+
status_code=422, content={"status": "invalid", "detail": exc.errors()[:5]}
|
|
339
|
+
)
|
|
308
340
|
|
|
309
341
|
@api.post(f"/env/{ENV_NAME}/initialize")
|
|
310
342
|
async def initialize(req: InitRequest, request: Request):
|
|
@@ -337,14 +369,19 @@ def fastapi_app():
|
|
|
337
369
|
def proxy_chat_completions(req: dict[str, Any]):
|
|
338
370
|
openai_key = os.environ.get("OPENAI_API_KEY")
|
|
339
371
|
if not openai_key:
|
|
340
|
-
raise HTTPException(
|
|
372
|
+
raise HTTPException(
|
|
373
|
+
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
|
|
374
|
+
detail="Missing OPENAI_API_KEY in task service environment",
|
|
375
|
+
)
|
|
341
376
|
# Sanitize payload for OpenAI models (e.g., gpt-5-*)
|
|
342
377
|
model = req.get("model")
|
|
343
378
|
payload = prepare_inference_payload_for_model(model, req)
|
|
344
379
|
headers = {"Authorization": f"Bearer {openai_key}"}
|
|
345
380
|
# Increase timeout for proxy calls (models may be slower)
|
|
346
381
|
with httpx.Client(timeout=120.0) as client:
|
|
347
|
-
resp = client.post(
|
|
382
|
+
resp = client.post(
|
|
383
|
+
"https://api.openai.com/v1/chat/completions", json=payload, headers=headers
|
|
384
|
+
)
|
|
348
385
|
try:
|
|
349
386
|
data = resp.json()
|
|
350
387
|
except Exception:
|
|
@@ -371,7 +408,10 @@ def fastapi_app():
|
|
|
371
408
|
expected = os.environ.get("ENVIRONMENT_API_KEY")
|
|
372
409
|
if not expected:
|
|
373
410
|
logger.error("rollout.auth.misconfigured: missing ENVIRONMENT_API_KEY")
|
|
374
|
-
raise HTTPException(
|
|
411
|
+
raise HTTPException(
|
|
412
|
+
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
|
|
413
|
+
detail="Auth not configured: missing ENVIRONMENT_API_KEY",
|
|
414
|
+
)
|
|
375
415
|
# Compute masked diagnostics (never log full keys)
|
|
376
416
|
try:
|
|
377
417
|
exp_len = len(expected)
|
|
@@ -385,7 +425,11 @@ def fastapi_app():
|
|
|
385
425
|
candidates = [c for c in [single, bearer, *multi] if c]
|
|
386
426
|
# Assert server sees ALL keys sent by client
|
|
387
427
|
if multi:
|
|
388
|
-
logger.info(
|
|
428
|
+
logger.info(
|
|
429
|
+
"rollout.auth.candidates: n=%s first15=%s",
|
|
430
|
+
len(candidates),
|
|
431
|
+
[c[:15] for c in candidates],
|
|
432
|
+
)
|
|
389
433
|
got_len = len(single or bearer or "")
|
|
390
434
|
got_suf = (single or bearer or "")[-5:] if got_len >= 5 else ""
|
|
391
435
|
except Exception:
|
|
@@ -398,13 +442,22 @@ def fastapi_app():
|
|
|
398
442
|
if not authorized:
|
|
399
443
|
logger.warning(
|
|
400
444
|
"rollout.auth.failed: have_any=%s expect_len=%s expect_last5=%s got_len=%s got_last5=%s",
|
|
401
|
-
bool(candidates),
|
|
445
|
+
bool(candidates),
|
|
446
|
+
exp_len,
|
|
447
|
+
exp_suf,
|
|
448
|
+
got_len,
|
|
449
|
+
got_suf,
|
|
450
|
+
)
|
|
451
|
+
raise HTTPException(
|
|
452
|
+
status_code=status.HTTP_401_UNAUTHORIZED, detail="Invalid or missing API key"
|
|
402
453
|
)
|
|
403
|
-
raise HTTPException(status_code=status.HTTP_401_UNAUTHORIZED, detail="Invalid or missing API key")
|
|
404
454
|
else:
|
|
405
455
|
logger.info(
|
|
406
456
|
"rollout.auth.ok: expect_len=%s expect_last5=%s got_len=%s got_last5=%s",
|
|
407
|
-
exp_len,
|
|
457
|
+
exp_len,
|
|
458
|
+
exp_suf,
|
|
459
|
+
got_len,
|
|
460
|
+
got_suf,
|
|
408
461
|
)
|
|
409
462
|
|
|
410
463
|
# Extract policy config
|
|
@@ -457,11 +510,15 @@ def fastapi_app():
|
|
|
457
510
|
# Build mapping using crafter's internal ids
|
|
458
511
|
import itertools as _it
|
|
459
512
|
import crafter as _crafter
|
|
513
|
+
|
|
460
514
|
dummy = None
|
|
461
515
|
try:
|
|
462
516
|
dummy = _crafter.Env()
|
|
463
517
|
max_id = (
|
|
464
|
-
max(
|
|
518
|
+
max(
|
|
519
|
+
max(dummy._world._mat_ids.values()),
|
|
520
|
+
max(dummy._sem_view._obj_ids.values()),
|
|
521
|
+
)
|
|
465
522
|
+ 1
|
|
466
523
|
)
|
|
467
524
|
id_to_item = ["void"] * max_id
|
|
@@ -516,8 +573,14 @@ def fastapi_app():
|
|
|
516
573
|
if ach:
|
|
517
574
|
all_achievements = list(ach.keys())
|
|
518
575
|
lines.append(f"achievements_available: {', '.join(all_achievements)}")
|
|
519
|
-
lines.append(
|
|
520
|
-
|
|
576
|
+
lines.append(
|
|
577
|
+
f"achievements_unlocked: {', '.join(ach_on)}"
|
|
578
|
+
if ach_on
|
|
579
|
+
else "achievements_unlocked: "
|
|
580
|
+
)
|
|
581
|
+
lines.append(
|
|
582
|
+
f"achievements_progress: {len(ach_on)}/{len(all_achievements)}"
|
|
583
|
+
)
|
|
521
584
|
# Local surroundings (7x7) using semantic_map
|
|
522
585
|
smap = obs.get("semantic_map")
|
|
523
586
|
if smap is not None and pos is not None:
|
|
@@ -539,7 +602,11 @@ def fastapi_app():
|
|
|
539
602
|
row.append("player")
|
|
540
603
|
else:
|
|
541
604
|
idx = int(smap[x, y])
|
|
542
|
-
name =
|
|
605
|
+
name = (
|
|
606
|
+
id_to_item[idx]
|
|
607
|
+
if 0 <= idx < len(id_to_item)
|
|
608
|
+
else str(idx)
|
|
609
|
+
)
|
|
543
610
|
row.append(name)
|
|
544
611
|
matrix.append(row)
|
|
545
612
|
# Transpose to match visual orientation
|
|
@@ -554,6 +621,7 @@ def fastapi_app():
|
|
|
554
621
|
if not lines:
|
|
555
622
|
lines.append("no salient state; explore to gather context")
|
|
556
623
|
return "\n".join(lines)
|
|
624
|
+
|
|
557
625
|
# Build compact context from last few tool calls (gpt-5-nano friendly)
|
|
558
626
|
lines: list[str] = []
|
|
559
627
|
for rec in reversed(trajectory_steps):
|
|
@@ -568,10 +636,14 @@ def fastapi_app():
|
|
|
568
636
|
name = tc0.get("tool_name") or tc0.get("name") or "unknown"
|
|
569
637
|
args = tc0.get("arguments")
|
|
570
638
|
lines.append(f"- {name}: {args}")
|
|
571
|
-
context_text = "Previous tool calls (most recent first):\n" + (
|
|
639
|
+
context_text = "Previous tool calls (most recent first):\n" + (
|
|
640
|
+
"\n".join(lines) if lines else "- none"
|
|
641
|
+
)
|
|
572
642
|
obs_text = _format_obs(observation)
|
|
573
643
|
combined_text = f"Current observation:\n{obs_text}\n\n{context_text}"
|
|
574
|
-
payload = policy.build_inference_request(
|
|
644
|
+
payload = policy.build_inference_request(
|
|
645
|
+
combined_text, history=[], turn=len(trajectory_steps)
|
|
646
|
+
)
|
|
575
647
|
# Debug: print the full prompt content in a stable labeled block for grepability
|
|
576
648
|
try:
|
|
577
649
|
print("PROMPT_DUMP_BEGIN")
|
|
@@ -593,14 +665,20 @@ def fastapi_app():
|
|
|
593
665
|
except Exception:
|
|
594
666
|
pass
|
|
595
667
|
try:
|
|
596
|
-
_ach =
|
|
668
|
+
_ach = (
|
|
669
|
+
observation.get("achievements_status")
|
|
670
|
+
if isinstance(observation, dict)
|
|
671
|
+
else {}
|
|
672
|
+
)
|
|
597
673
|
_ach_on = [k for k, v in (_ach or {}).items() if v]
|
|
598
674
|
print(f"[task:crafter] achievements_unlocked: {_ach_on}", flush=True)
|
|
599
675
|
except Exception:
|
|
600
676
|
pass
|
|
677
|
+
|
|
601
678
|
# Prepare payload based on model family (OpenAI vs vLLM)
|
|
602
679
|
def _prepare_payload(p: dict, mdl: str | None) -> dict:
|
|
603
680
|
return prepare_inference_payload_for_model(mdl, p)
|
|
681
|
+
|
|
604
682
|
# Debug: payload shape
|
|
605
683
|
print(
|
|
606
684
|
"[task:crafter] inference payload: ",
|
|
@@ -619,7 +697,7 @@ def fastapi_app():
|
|
|
619
697
|
_timeouts = httpx.Timeout(connect=10.0, read=180.0, write=60.0, pool=60.0)
|
|
620
698
|
with httpx.Client(timeout=_timeouts) as client:
|
|
621
699
|
# Decide endpoint: avoid calling our own /proxy inside the same request
|
|
622
|
-
_direct =
|
|
700
|
+
_direct = "api.openai.com" in inference_url
|
|
623
701
|
if _direct:
|
|
624
702
|
# Call OpenAI directly
|
|
625
703
|
if _okey:
|
|
@@ -639,6 +717,7 @@ def fastapi_app():
|
|
|
639
717
|
# Debug: outbound request diagnostics
|
|
640
718
|
try:
|
|
641
719
|
import json as _json
|
|
720
|
+
|
|
642
721
|
_size = len(_json.dumps(to_send))
|
|
643
722
|
except Exception:
|
|
644
723
|
_size = -1
|
|
@@ -647,7 +726,12 @@ def fastapi_app():
|
|
|
647
726
|
{
|
|
648
727
|
"endpoint": f"{endpoint_base.rstrip('/')}/v1/chat/completions",
|
|
649
728
|
"direct_openai": bool(_direct),
|
|
650
|
-
"timeout": {
|
|
729
|
+
"timeout": {
|
|
730
|
+
"read": 180.0,
|
|
731
|
+
"connect": 10.0,
|
|
732
|
+
"write": 60.0,
|
|
733
|
+
"pool": 60.0,
|
|
734
|
+
},
|
|
651
735
|
"payload_bytes": _size,
|
|
652
736
|
"has_auth": bool(headers.get("Authorization")),
|
|
653
737
|
},
|
|
@@ -663,14 +747,23 @@ def fastapi_app():
|
|
|
663
747
|
)
|
|
664
748
|
except httpx.ReadTimeout as rte:
|
|
665
749
|
_elapsed = time.time() - _t0
|
|
666
|
-
print(
|
|
750
|
+
print(
|
|
751
|
+
f"[task:crafter][timeout] read timeout after {_elapsed:.1f}s: {rte}",
|
|
752
|
+
flush=True,
|
|
753
|
+
)
|
|
667
754
|
raise
|
|
668
755
|
except Exception as re:
|
|
669
756
|
_elapsed = time.time() - _t0
|
|
670
|
-
print(
|
|
757
|
+
print(
|
|
758
|
+
f"[task:crafter][error] request failed after {_elapsed:.1f}s: {type(re).__name__}: {re}",
|
|
759
|
+
flush=True,
|
|
760
|
+
)
|
|
671
761
|
raise
|
|
672
762
|
_elapsed = time.time() - _t0
|
|
673
|
-
print(
|
|
763
|
+
print(
|
|
764
|
+
f"[task:crafter] inference status= {resp.status_code} elapsed={_elapsed:.2f}s",
|
|
765
|
+
flush=True,
|
|
766
|
+
)
|
|
674
767
|
# Emit a light-weight perf snapshot for visibility
|
|
675
768
|
try:
|
|
676
769
|
print(
|
|
@@ -702,6 +795,7 @@ def fastapi_app():
|
|
|
702
795
|
# Print full tool call payloads for inspection
|
|
703
796
|
try:
|
|
704
797
|
import json as _json
|
|
798
|
+
|
|
705
799
|
for _i, _tc in enumerate(parsed):
|
|
706
800
|
try:
|
|
707
801
|
print(
|
|
@@ -717,8 +811,11 @@ def fastapi_app():
|
|
|
717
811
|
# Dump compact body preview to understand schema when no tools parsed
|
|
718
812
|
try:
|
|
719
813
|
import json as _json
|
|
720
|
-
|
|
721
|
-
|
|
814
|
+
|
|
815
|
+
preview = _json.dumps(data, separators=(",", ":"))
|
|
816
|
+
print(
|
|
817
|
+
"[task:crafter] body(no_tools) preview:", preview[:800], flush=True
|
|
818
|
+
)
|
|
722
819
|
except Exception:
|
|
723
820
|
pass
|
|
724
821
|
# Early terminate the episode to avoid hanging on empty tool calls
|
|
@@ -736,6 +833,7 @@ def fastapi_app():
|
|
|
736
833
|
if name == "interact":
|
|
737
834
|
# Parse the JSON arguments string
|
|
738
835
|
import json
|
|
836
|
+
|
|
739
837
|
args_str = tc.get("arguments", "{}")
|
|
740
838
|
try:
|
|
741
839
|
args_dict = json.loads(args_str)
|
|
@@ -743,7 +841,10 @@ def fastapi_app():
|
|
|
743
841
|
reasoning = args_dict.get("reasoning", "")
|
|
744
842
|
print(f"[task:crafter] reasoning: {reasoning}", flush=True)
|
|
745
843
|
except (json.JSONDecodeError, TypeError):
|
|
746
|
-
print(
|
|
844
|
+
print(
|
|
845
|
+
f"[task:crafter] ERROR: Failed to parse arguments: {args_str}",
|
|
846
|
+
flush=True,
|
|
847
|
+
)
|
|
747
848
|
actions = []
|
|
748
849
|
reasoning = "Parse error"
|
|
749
850
|
|
|
@@ -751,12 +852,18 @@ def fastapi_app():
|
|
|
751
852
|
# Print a compact echo of the current prompt + tool call for easier triage
|
|
752
853
|
try:
|
|
753
854
|
import json as _json
|
|
754
|
-
|
|
755
|
-
|
|
756
|
-
"
|
|
757
|
-
|
|
758
|
-
|
|
759
|
-
|
|
855
|
+
|
|
856
|
+
print(
|
|
857
|
+
"TOOLCALL_CONFIG:",
|
|
858
|
+
_json.dumps(
|
|
859
|
+
{
|
|
860
|
+
"policy": req.policy.policy_name,
|
|
861
|
+
"tools_present": True,
|
|
862
|
+
"tool_choice": "required",
|
|
863
|
+
"stop_after": 1,
|
|
864
|
+
}
|
|
865
|
+
),
|
|
866
|
+
)
|
|
760
867
|
except Exception:
|
|
761
868
|
pass
|
|
762
869
|
|
|
@@ -768,42 +875,77 @@ def fastapi_app():
|
|
|
768
875
|
total_reward += float(reward)
|
|
769
876
|
# Debug: print step outcome (compact)
|
|
770
877
|
try:
|
|
771
|
-
ok =
|
|
772
|
-
|
|
878
|
+
ok = (
|
|
879
|
+
list(observation.keys())
|
|
880
|
+
if isinstance(observation, dict)
|
|
881
|
+
else []
|
|
882
|
+
)
|
|
883
|
+
print(
|
|
884
|
+
f"[task:crafter] step => a={act} r={float(reward)} done={bool(done)} obs_keys={ok[:5]}",
|
|
885
|
+
flush=True,
|
|
886
|
+
)
|
|
773
887
|
except Exception:
|
|
774
888
|
pass
|
|
775
|
-
step = RolloutStep(
|
|
889
|
+
step = RolloutStep(
|
|
890
|
+
obs=observation,
|
|
891
|
+
tool_calls=pending_tool_calls,
|
|
892
|
+
reward=float(reward),
|
|
893
|
+
done=bool(done),
|
|
894
|
+
truncated=False,
|
|
895
|
+
info=info,
|
|
896
|
+
)
|
|
776
897
|
trajectory_steps.append(step)
|
|
777
898
|
ops_executed += 1
|
|
778
899
|
|
|
779
900
|
# Check for achievement-based termination
|
|
780
901
|
if isinstance(observation, dict):
|
|
781
|
-
current_achievements = observation.get(
|
|
902
|
+
current_achievements = observation.get(
|
|
903
|
+
"achievements_status", {}
|
|
904
|
+
)
|
|
782
905
|
# Track flips 0→1 within this decision
|
|
783
906
|
try:
|
|
784
907
|
if not isinstance(current_achievements, dict):
|
|
785
908
|
current_achievements = {}
|
|
786
909
|
if prev_ach is None:
|
|
787
|
-
prev_ach = {
|
|
910
|
+
prev_ach = {
|
|
911
|
+
k: bool(v)
|
|
912
|
+
for k, v in (current_achievements or {}).items()
|
|
913
|
+
}
|
|
788
914
|
else:
|
|
789
915
|
for name, on in (current_achievements or {}).items():
|
|
790
916
|
if bool(on) and not bool(prev_ach.get(name, False)):
|
|
791
917
|
decision_flips.add(str(name))
|
|
792
918
|
# Update prev_ach to latest snapshot
|
|
793
|
-
prev_ach = {
|
|
919
|
+
prev_ach = {
|
|
920
|
+
k: bool(v)
|
|
921
|
+
for k, v in (current_achievements or {}).items()
|
|
922
|
+
}
|
|
794
923
|
except Exception:
|
|
795
924
|
pass
|
|
796
|
-
achieved_count = sum(
|
|
925
|
+
achieved_count = sum(
|
|
926
|
+
1 for v in current_achievements.values() if v
|
|
927
|
+
)
|
|
797
928
|
total_achievements = len(current_achievements)
|
|
798
929
|
|
|
799
930
|
# Terminate if we've achieved a significant portion of available achievements
|
|
800
|
-
if total_achievements > 0 and achieved_count >= max(
|
|
801
|
-
|
|
802
|
-
|
|
931
|
+
if total_achievements > 0 and achieved_count >= max(
|
|
932
|
+
3, total_achievements // 2
|
|
933
|
+
):
|
|
934
|
+
print(
|
|
935
|
+
f"[task:crafter] achievement_termination: {achieved_count}/{total_achievements} achievements reached",
|
|
936
|
+
flush=True,
|
|
937
|
+
)
|
|
938
|
+
print(
|
|
939
|
+
f"[task:crafter] achieved: {[k for k, v in current_achievements.items() if v]}",
|
|
940
|
+
flush=True,
|
|
941
|
+
)
|
|
803
942
|
break
|
|
804
943
|
|
|
805
944
|
if done or len(trajectory_steps) >= max_steps:
|
|
806
|
-
print(
|
|
945
|
+
print(
|
|
946
|
+
f"[task:crafter] episode_end: done={bool(done)} steps={len(trajectory_steps)} total_reward={total_reward}",
|
|
947
|
+
flush=True,
|
|
948
|
+
)
|
|
807
949
|
break
|
|
808
950
|
elif name == "terminate":
|
|
809
951
|
# Handle termination
|
|
@@ -812,7 +954,14 @@ def fastapi_app():
|
|
|
812
954
|
else:
|
|
813
955
|
# Non-interact tool call: count as a step without env change
|
|
814
956
|
print("[task:crafter] non-interact tool_call:", name, flush=True)
|
|
815
|
-
step = RolloutStep(
|
|
957
|
+
step = RolloutStep(
|
|
958
|
+
obs=observation,
|
|
959
|
+
tool_calls=pending_tool_calls,
|
|
960
|
+
reward=None,
|
|
961
|
+
done=False,
|
|
962
|
+
truncated=False,
|
|
963
|
+
info=info,
|
|
964
|
+
)
|
|
816
965
|
trajectory_steps.append(step)
|
|
817
966
|
ops_executed += 1
|
|
818
967
|
# End of decision: record indicator_i for shaping
|
|
@@ -823,7 +972,10 @@ def fastapi_app():
|
|
|
823
972
|
pass
|
|
824
973
|
pending_tool_calls = None
|
|
825
974
|
if len(trajectory_steps) >= max_steps:
|
|
826
|
-
print(
|
|
975
|
+
print(
|
|
976
|
+
f"[task:crafter] max_steps_reached: steps={len(trajectory_steps)} total_reward={total_reward}",
|
|
977
|
+
flush=True,
|
|
978
|
+
)
|
|
827
979
|
break
|
|
828
980
|
else:
|
|
829
981
|
# Unknown op: skip
|
|
@@ -865,7 +1017,11 @@ def fastapi_app():
|
|
|
865
1017
|
# Step-reward shaping: compute decision-level rewards if enabled
|
|
866
1018
|
branches: dict[str, Any] = {}
|
|
867
1019
|
try:
|
|
868
|
-
sr_cfg = (
|
|
1020
|
+
sr_cfg = (
|
|
1021
|
+
(req.record.config or {}).get("step_rewards")
|
|
1022
|
+
if isinstance(req.record, RolloutRecordConfig)
|
|
1023
|
+
else None
|
|
1024
|
+
)
|
|
869
1025
|
except Exception:
|
|
870
1026
|
sr_cfg = None
|
|
871
1027
|
try:
|
|
@@ -880,6 +1036,7 @@ def fastapi_app():
|
|
|
880
1036
|
indicator_lambda = float(sr_cfg.get("indicator_lambda", 0.0))
|
|
881
1037
|
# Env overrides
|
|
882
1038
|
import os as _os2
|
|
1039
|
+
|
|
883
1040
|
if _os2.getenv("STEP_BETA"):
|
|
884
1041
|
step_beta = float(_os2.getenv("STEP_BETA"))
|
|
885
1042
|
if _os2.getenv("STEP_LAMBDA"):
|
|
@@ -909,15 +1066,19 @@ def fastapi_app():
|
|
|
909
1066
|
# Optional tracing of episode/rewards (gated)
|
|
910
1067
|
try:
|
|
911
1068
|
import os as _os3
|
|
1069
|
+
|
|
912
1070
|
if _os3.getenv("TRACE_RL", "0") == "1":
|
|
913
1071
|
from synth_ai.tracing_v3.session_tracer import SessionTracer # type: ignore
|
|
1072
|
+
|
|
914
1073
|
tracer = SessionTracer()
|
|
915
1074
|
await tracer.initialize()
|
|
916
1075
|
meta = {
|
|
917
1076
|
"env": req.env.env_name,
|
|
918
1077
|
"policy": req.policy.policy_name,
|
|
919
1078
|
"step_rewards": {
|
|
920
|
-
"enabled": bool(sr_cfg.get("enabled", False))
|
|
1079
|
+
"enabled": bool(sr_cfg.get("enabled", False))
|
|
1080
|
+
if isinstance(sr_cfg, dict)
|
|
1081
|
+
else False,
|
|
921
1082
|
"mode": (sr_cfg.get("mode") if isinstance(sr_cfg, dict) else None),
|
|
922
1083
|
},
|
|
923
1084
|
}
|
|
@@ -938,7 +1099,10 @@ def fastapi_app():
|
|
|
938
1099
|
num_episodes=1,
|
|
939
1100
|
)
|
|
940
1101
|
# Debug: print reward and achievement metrics
|
|
941
|
-
print(
|
|
1102
|
+
print(
|
|
1103
|
+
f"[task:crafter] Rollout metrics: total_reward={total_reward}, total_achievements={total_achievements}, mean_return={metrics.mean_return}, episode_returns={metrics.episode_returns}",
|
|
1104
|
+
flush=True,
|
|
1105
|
+
)
|
|
942
1106
|
return RolloutResponse(
|
|
943
1107
|
run_id=req.run_id,
|
|
944
1108
|
trajectories=[trajectory],
|
|
@@ -952,11 +1116,16 @@ def fastapi_app():
|
|
|
952
1116
|
def test_auth(request: Request):
|
|
953
1117
|
expected = os.environ.get("ENVIRONMENT_API_KEY")
|
|
954
1118
|
if not expected:
|
|
955
|
-
raise HTTPException(
|
|
1119
|
+
raise HTTPException(
|
|
1120
|
+
status_code=status.HTTP_503_SERVICE_UNAVAILABLE,
|
|
1121
|
+
detail="Missing ENVIRONMENT_API_KEY in service env",
|
|
1122
|
+
)
|
|
956
1123
|
header_key = request.headers.get("x-api-key") or request.headers.get("X-API-Key")
|
|
957
1124
|
ok = bool(header_key) and (header_key == expected)
|
|
958
1125
|
if not ok:
|
|
959
|
-
raise HTTPException(
|
|
1126
|
+
raise HTTPException(
|
|
1127
|
+
status_code=status.HTTP_401_UNAUTHORIZED, detail="Invalid or missing API key"
|
|
1128
|
+
)
|
|
960
1129
|
return {"ok": True}
|
|
961
1130
|
|
|
962
1131
|
return api
|
|
@@ -229,7 +229,9 @@ def format_model_stats(stats: list[dict[str, Any]]) -> str:
|
|
|
229
229
|
if not stats:
|
|
230
230
|
return "No model usage recorded."
|
|
231
231
|
lines = ["Model usage (by LLM calls):"]
|
|
232
|
-
header =
|
|
232
|
+
header = (
|
|
233
|
+
f"{'Model':30} {'Provider':10} {'Calls':>7} {'Tokens (in/out)':>20} {'Avg latency ms':>15}"
|
|
234
|
+
)
|
|
233
235
|
lines.append(header)
|
|
234
236
|
lines.append("-" * len(header))
|
|
235
237
|
for item in stats:
|
|
@@ -243,9 +245,7 @@ def format_model_stats(stats: list[dict[str, Any]]) -> str:
|
|
|
243
245
|
return "\n".join(lines)
|
|
244
246
|
|
|
245
247
|
|
|
246
|
-
def format_achievement_summary(
|
|
247
|
-
name_counts: Counter, size_counts: Counter
|
|
248
|
-
) -> str:
|
|
248
|
+
def format_achievement_summary(name_counts: Counter, size_counts: Counter) -> str:
|
|
249
249
|
lines = ["Unique achievements unlocked:"]
|
|
250
250
|
if name_counts:
|
|
251
251
|
top = name_counts.most_common()
|
|
@@ -349,7 +349,9 @@ def format_model_achievement_stats(model_stats: dict[str, dict[str, Any]]) -> st
|
|
|
349
349
|
return "Achievement stats by model:\n (no model sessions recorded)"
|
|
350
350
|
|
|
351
351
|
lines = ["Achievement stats by model:"]
|
|
352
|
-
for model_name in sorted(
|
|
352
|
+
for model_name in sorted(
|
|
353
|
+
model_stats.keys(), key=lambda m: model_stats[m]["sessions"], reverse=True
|
|
354
|
+
):
|
|
353
355
|
stats = model_stats[model_name]
|
|
354
356
|
providers = ", ".join(sorted(stats["providers"])) if stats["providers"] else "-"
|
|
355
357
|
sessions = stats["sessions"]
|