synth-ai 0.2.6.dev1__py3-none-any.whl → 0.2.6.dev3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- synth_ai/cli/rl_demo.py +24 -5
- synth_ai/demos/core/cli.py +443 -40
- synth_ai/demos/demo_task_apps/math/_common.py +17 -0
- synth_ai/demos/demo_task_apps/math/modal_task_app.py +415 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_traces_sft_turso.py +23 -9
- synth_ai/environments/service/app.py +13 -6
- synth_ai/http.py +26 -102
- synth_ai/http_client.py +104 -0
- synth_ai/lm/core/synth_models.py +2 -2
- {synth_ai-0.2.6.dev1.dist-info → synth_ai-0.2.6.dev3.dist-info}/METADATA +1 -1
- {synth_ai-0.2.6.dev1.dist-info → synth_ai-0.2.6.dev3.dist-info}/RECORD +15 -12
- {synth_ai-0.2.6.dev1.dist-info → synth_ai-0.2.6.dev3.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.6.dev1.dist-info → synth_ai-0.2.6.dev3.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.2.6.dev1.dist-info → synth_ai-0.2.6.dev3.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.2.6.dev1.dist-info → synth_ai-0.2.6.dev3.dist-info}/top_level.txt +0 -0
synth_ai/demos/core/cli.py
CHANGED
|
@@ -6,11 +6,23 @@ import os
|
|
|
6
6
|
import sys
|
|
7
7
|
import time
|
|
8
8
|
from typing import Any, Dict, Callable
|
|
9
|
+
import shutil
|
|
10
|
+
import stat
|
|
9
11
|
|
|
10
12
|
from synth_ai.demos.demo_task_apps import core as demo_core
|
|
11
13
|
from synth_ai.demos.demo_task_apps.core import DemoEnv
|
|
12
14
|
|
|
13
15
|
|
|
16
|
+
def _is_modal_public_url(u: str) -> bool:
|
|
17
|
+
try:
|
|
18
|
+
s = (u or "").strip().lower()
|
|
19
|
+
if not (s.startswith("http://") or s.startswith("https://")):
|
|
20
|
+
return False
|
|
21
|
+
return (".modal.run" in s) and ("modal.local" not in s) and ("pypi-mirror" not in s)
|
|
22
|
+
except Exception:
|
|
23
|
+
return False
|
|
24
|
+
|
|
25
|
+
|
|
14
26
|
def cmd_check(_args: argparse.Namespace) -> int:
|
|
15
27
|
env = demo_core.load_env()
|
|
16
28
|
cwd_env_path = os.path.join(os.getcwd(), ".env")
|
|
@@ -21,6 +33,15 @@ def cmd_check(_args: argparse.Namespace) -> int:
|
|
|
21
33
|
env = demo_core.load_env()
|
|
22
34
|
local_env = demo_core.load_dotenv_file(cwd_env_path)
|
|
23
35
|
|
|
36
|
+
def _is_modal_public_url(u: str) -> bool:
|
|
37
|
+
try:
|
|
38
|
+
s = (u or "").strip().lower()
|
|
39
|
+
if not (s.startswith("http://") or s.startswith("https://")):
|
|
40
|
+
return False
|
|
41
|
+
return (".modal.run" in s) and ("modal.local" not in s) and ("pypi-mirror" not in s)
|
|
42
|
+
except Exception:
|
|
43
|
+
return False
|
|
44
|
+
|
|
24
45
|
def _maybe_fix_task_url() -> None:
|
|
25
46
|
if not env.task_app_name:
|
|
26
47
|
return
|
|
@@ -28,9 +49,7 @@ def cmd_check(_args: argparse.Namespace) -> int:
|
|
|
28
49
|
needs_lookup = False
|
|
29
50
|
if not current:
|
|
30
51
|
needs_lookup = True
|
|
31
|
-
elif not
|
|
32
|
-
needs_lookup = True
|
|
33
|
-
elif not current.startswith("http://") and not current.startswith("https://"):
|
|
52
|
+
elif not _is_modal_public_url(current):
|
|
34
53
|
needs_lookup = True
|
|
35
54
|
if not needs_lookup:
|
|
36
55
|
return
|
|
@@ -48,7 +67,7 @@ def cmd_check(_args: argparse.Namespace) -> int:
|
|
|
48
67
|
return
|
|
49
68
|
new_url = ""
|
|
50
69
|
for token in out.split():
|
|
51
|
-
if
|
|
70
|
+
if _is_modal_public_url(token):
|
|
52
71
|
new_url = token.strip().rstrip("/")
|
|
53
72
|
break
|
|
54
73
|
if new_url and new_url != current:
|
|
@@ -166,6 +185,45 @@ def _popen_stream(cmd: list[str], cwd: str | None = None, env: dict | None = Non
|
|
|
166
185
|
return int(proc.returncode or 0)
|
|
167
186
|
|
|
168
187
|
|
|
188
|
+
def _popen_stream_capture(cmd: list[str], cwd: str | None = None, env: dict | None = None) -> tuple[int, str]:
|
|
189
|
+
"""Stream subprocess output to stdout and also capture it into a buffer."""
|
|
190
|
+
import subprocess
|
|
191
|
+
import threading
|
|
192
|
+
|
|
193
|
+
buf_lines: list[str] = []
|
|
194
|
+
try:
|
|
195
|
+
proc = subprocess.Popen(
|
|
196
|
+
cmd,
|
|
197
|
+
cwd=cwd,
|
|
198
|
+
env=env,
|
|
199
|
+
stdout=subprocess.PIPE,
|
|
200
|
+
stderr=subprocess.STDOUT,
|
|
201
|
+
text=True,
|
|
202
|
+
bufsize=1,
|
|
203
|
+
)
|
|
204
|
+
except Exception as exc:
|
|
205
|
+
print(f"Failed to launch {' '.join(cmd)}: {exc}")
|
|
206
|
+
return 1, ""
|
|
207
|
+
|
|
208
|
+
def _pump(stdout) -> None:
|
|
209
|
+
try:
|
|
210
|
+
for line in stdout:
|
|
211
|
+
line = line.rstrip()
|
|
212
|
+
print(line)
|
|
213
|
+
buf_lines.append(line)
|
|
214
|
+
except Exception:
|
|
215
|
+
pass
|
|
216
|
+
|
|
217
|
+
if proc.stdout is not None:
|
|
218
|
+
t = threading.Thread(target=_pump, args=(proc.stdout,), daemon=True)
|
|
219
|
+
t.start()
|
|
220
|
+
proc.wait()
|
|
221
|
+
t.join(timeout=1.0)
|
|
222
|
+
else:
|
|
223
|
+
proc.wait()
|
|
224
|
+
return int(proc.returncode or 0), "\n".join(buf_lines)
|
|
225
|
+
|
|
226
|
+
|
|
169
227
|
def cmd_deploy(args: argparse.Namespace) -> int:
|
|
170
228
|
env = demo_core.load_env()
|
|
171
229
|
url = ""
|
|
@@ -187,9 +245,12 @@ def cmd_deploy(args: argparse.Namespace) -> int:
|
|
|
187
245
|
# Auto-detect app path if not supplied; prompt for name and confirmation.
|
|
188
246
|
app_path = os.path.abspath(args.app) if args.app else None
|
|
189
247
|
if not app_path or not os.path.isfile(app_path):
|
|
248
|
+
# Prefer the synth_demo/ app seeded by `rl_demo init` over any root-level files
|
|
190
249
|
candidates = [
|
|
250
|
+
os.path.abspath(os.path.join(os.getcwd(), "synth_demo", "task_app.py")),
|
|
251
|
+
os.path.abspath(os.path.join(os.getcwd(), "task_app.py")),
|
|
252
|
+
os.path.abspath(os.path.join(os.getcwd(), "app.py")),
|
|
191
253
|
os.path.abspath(os.path.join(os.getcwd(), "math_task_app.py")),
|
|
192
|
-
"/Users/joshpurtell/Documents/GitHub/monorepo/tests/applications/math/rl/math_task_app.py",
|
|
193
254
|
]
|
|
194
255
|
app_path = next((p for p in candidates if os.path.isfile(p)), None)
|
|
195
256
|
if not app_path and args.script:
|
|
@@ -200,12 +261,14 @@ def cmd_deploy(args: argparse.Namespace) -> int:
|
|
|
200
261
|
app_name = args.name
|
|
201
262
|
else:
|
|
202
263
|
if not app_path:
|
|
203
|
-
entered = input("Path to Modal app.py (e.g.,
|
|
264
|
+
entered = input("Path to Modal app.py (e.g., ./task_app.py): ").strip()
|
|
204
265
|
if not entered:
|
|
205
266
|
raise FileNotFoundError("No app.py path provided and auto-detect failed")
|
|
206
267
|
app_path = os.path.abspath(entered)
|
|
207
268
|
if not os.path.isfile(app_path):
|
|
208
269
|
raise FileNotFoundError(f"App file not found: {app_path}")
|
|
270
|
+
# Surface the app path before asking for the name
|
|
271
|
+
print(f"Using task app: {app_path}")
|
|
209
272
|
suggested_name = args.name or f"synth-{os.path.splitext(os.path.basename(app_path))[0]}"
|
|
210
273
|
name_in = input(f"Modal app name [{suggested_name}]: ").strip() or suggested_name
|
|
211
274
|
app_name = name_in
|
|
@@ -217,22 +280,44 @@ def cmd_deploy(args: argparse.Namespace) -> int:
|
|
|
217
280
|
print("Aborted by user.")
|
|
218
281
|
return 1
|
|
219
282
|
deploy_cmd = ["uv", "run", "python", "-m", "modal", "deploy", "--name", name_in, app_path]
|
|
220
|
-
|
|
221
|
-
|
|
283
|
+
print("\nStreaming Modal build/deploy logs (this can take several minutes on first run)…\n")
|
|
284
|
+
code, deploy_logs = _popen_stream_capture(deploy_cmd)
|
|
222
285
|
if code != 0:
|
|
223
286
|
raise RuntimeError(f"modal deploy failed (exit {code})")
|
|
287
|
+
# Try to parse URL directly from streamed logs
|
|
288
|
+
if not url:
|
|
289
|
+
try:
|
|
290
|
+
import re as _re
|
|
291
|
+
m_all = _re.findall(r"https?://[^\s]+\.modal\.run", deploy_logs or "")
|
|
292
|
+
if m_all:
|
|
293
|
+
url = m_all[-1].strip().rstrip("/")
|
|
294
|
+
except Exception:
|
|
295
|
+
pass
|
|
224
296
|
url_cmd = ["uv", "run", "python", "-m", "modal", "app", "url", name_in]
|
|
225
297
|
code2, out2 = _popen_capture(url_cmd)
|
|
226
298
|
if code2 == 0:
|
|
227
299
|
for token in out2.split():
|
|
228
|
-
if
|
|
300
|
+
if _is_modal_public_url(token):
|
|
229
301
|
url = token.strip().rstrip("/")
|
|
230
302
|
break
|
|
303
|
+
# Fallback: try reading recent Modal logs for the app to find a URL line
|
|
231
304
|
if not url:
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
305
|
+
code3, out3 = _popen_capture(["uv", "run", "python", "-m", "modal", "app", "list"])
|
|
306
|
+
if code3 == 0 and out3:
|
|
307
|
+
for line in out3.splitlines():
|
|
308
|
+
if name_in in line:
|
|
309
|
+
for token in line.split():
|
|
310
|
+
if _is_modal_public_url(token):
|
|
311
|
+
url = token.strip().rstrip("/")
|
|
312
|
+
break
|
|
313
|
+
if url:
|
|
314
|
+
break
|
|
315
|
+
# Prompt user if still no valid URL
|
|
316
|
+
if not url:
|
|
317
|
+
print("\nCould not auto-detect a public Modal URL for the app.")
|
|
318
|
+
entered = input("Enter the Modal public URL (must contain '.modal.run'), or press Enter to abort: ").strip()
|
|
319
|
+
if entered and _is_modal_public_url(entered):
|
|
320
|
+
url = entered.rstrip("/")
|
|
236
321
|
if not url:
|
|
237
322
|
raise RuntimeError("Failed to resolve public URL from modal CLI output")
|
|
238
323
|
if not url:
|
|
@@ -286,9 +371,34 @@ def cmd_configure(args: argparse.Namespace) -> int:
|
|
|
286
371
|
demo_core.persist_dotenv_values({"ENVIRONMENT_API_KEY": env_key})
|
|
287
372
|
|
|
288
373
|
task_url = env.task_app_base_url
|
|
289
|
-
if not task_url:
|
|
290
|
-
|
|
291
|
-
|
|
374
|
+
if not task_url or not _is_modal_public_url(task_url):
|
|
375
|
+
# If we have an app name, offer to resolve from Modal first
|
|
376
|
+
resolved = ""
|
|
377
|
+
if env.task_app_name:
|
|
378
|
+
try:
|
|
379
|
+
choice = input(f"Resolve URL from Modal for app '{env.task_app_name}'? [Y/n]: ").strip().lower() or "y"
|
|
380
|
+
if choice.startswith("y"):
|
|
381
|
+
code, out = _popen_capture([
|
|
382
|
+
"uv", "run", "python", "-m", "modal", "app", "url", env.task_app_name
|
|
383
|
+
])
|
|
384
|
+
if code == 0 and out:
|
|
385
|
+
for tok in out.split():
|
|
386
|
+
if _is_modal_public_url(tok):
|
|
387
|
+
resolved = tok.strip().rstrip("/")
|
|
388
|
+
break
|
|
389
|
+
except Exception:
|
|
390
|
+
resolved = ""
|
|
391
|
+
if not resolved:
|
|
392
|
+
print("Task app URL not configured or not a valid Modal public URL.")
|
|
393
|
+
print("Examples: https://<app-name>-fastapi-app.modal.run")
|
|
394
|
+
entered = input("Enter Task App base URL (must contain '.modal.run'), or press Enter to abort: ").strip()
|
|
395
|
+
if not entered or not _is_modal_public_url(entered):
|
|
396
|
+
print("Valid Task App URL is required. Run: uvx synth-ai rl_demo deploy")
|
|
397
|
+
return 1
|
|
398
|
+
task_url = entered.rstrip("/")
|
|
399
|
+
else:
|
|
400
|
+
task_url = resolved
|
|
401
|
+
demo_core.persist_task_url(task_url, name=(env.task_app_name or None))
|
|
292
402
|
|
|
293
403
|
app_name = env.task_app_name.strip()
|
|
294
404
|
if not app_name:
|
|
@@ -317,20 +427,56 @@ def cmd_configure(args: argparse.Namespace) -> int:
|
|
|
317
427
|
secret_args.append(f"SYNTH_API_KEY={synth_for_secret}")
|
|
318
428
|
|
|
319
429
|
create_cmd = ["uv", "run", "modal", "secret", "create", secret_name, *secret_args]
|
|
320
|
-
|
|
430
|
+
def _mask_args(args: list[str]) -> list[str]:
|
|
431
|
+
masked: list[str] = []
|
|
432
|
+
for a in args:
|
|
433
|
+
if "=" in a and any(a.startswith(k + "=") for k in ("ENVIRONMENT_API_KEY", "OPENAI_API_KEY", "SYNTH_API_KEY")):
|
|
434
|
+
try:
|
|
435
|
+
k, v = a.split("=", 1)
|
|
436
|
+
suf = v[-5:] if len(v) >= 5 else ""
|
|
437
|
+
masked.append(f"{k}=***{suf}")
|
|
438
|
+
except Exception:
|
|
439
|
+
masked.append("<masked>")
|
|
440
|
+
else:
|
|
441
|
+
masked.append(a)
|
|
442
|
+
return masked
|
|
443
|
+
|
|
444
|
+
print("\n[configure] Creating Modal secret (streaming logs)…")
|
|
445
|
+
print("[configure] Command:", " ".join(_mask_args(create_cmd)))
|
|
446
|
+
code = _popen_stream(create_cmd)
|
|
321
447
|
if code != 0:
|
|
322
|
-
print(
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
448
|
+
print("[configure] Secret create failed; attempting delete → create")
|
|
449
|
+
delete_cmd = ["bash", "-lc", f"printf 'y\\n' | uv run modal secret delete {secret_name}"]
|
|
450
|
+
print("[configure] Command:", " ".join(delete_cmd))
|
|
451
|
+
_popen_stream(delete_cmd)
|
|
452
|
+
print("[configure] Retrying secret create…")
|
|
453
|
+
print("[configure] Command:", " ".join(_mask_args(create_cmd)))
|
|
454
|
+
code = _popen_stream(create_cmd)
|
|
326
455
|
if code != 0:
|
|
327
|
-
print(
|
|
328
|
-
print("Failed to provision Modal secret.")
|
|
456
|
+
print("[configure] Failed to provision Modal secret.")
|
|
329
457
|
return 2
|
|
330
458
|
|
|
331
459
|
# Verify task app can read the secret by hitting rollout health with X-API-Key.
|
|
332
460
|
rollout_url = task_url.rstrip("/") + "/health/rollout"
|
|
333
|
-
|
|
461
|
+
print("[configure] Verifying rollout health:")
|
|
462
|
+
# Prefer rollout-specific health first (auth-aware), then plain /health
|
|
463
|
+
health_base = task_url.rstrip("/")
|
|
464
|
+
health_urls = [f"{health_base}/health/rollout", f"{health_base}/health"]
|
|
465
|
+
rc = 0
|
|
466
|
+
body = ""
|
|
467
|
+
for h in health_urls:
|
|
468
|
+
print("[configure] GET", h)
|
|
469
|
+
rc, body = _http("GET", h, headers={"X-API-Key": env_key})
|
|
470
|
+
if rc == 200:
|
|
471
|
+
rollout_url = h
|
|
472
|
+
break
|
|
473
|
+
print("[configure] status:", rc)
|
|
474
|
+
try:
|
|
475
|
+
import json as _json
|
|
476
|
+
preview = _json.dumps(body)[:800] if isinstance(body, dict) else str(body)[:800]
|
|
477
|
+
except Exception:
|
|
478
|
+
preview = str(body)[:800]
|
|
479
|
+
print("[configure] body:", preview)
|
|
334
480
|
if rc != 200:
|
|
335
481
|
print(f"Warning: rollout health check failed ({rc}). Response: {body}")
|
|
336
482
|
else:
|
|
@@ -341,22 +487,26 @@ def cmd_configure(args: argparse.Namespace) -> int:
|
|
|
341
487
|
env.task_app_name = app_name
|
|
342
488
|
env.task_app_secret_name = secret_name
|
|
343
489
|
|
|
344
|
-
#
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
490
|
+
# Prefer the seeded CWD config if present; otherwise fall back to packaged default
|
|
491
|
+
seeded_cfg = os.path.abspath(os.path.join(os.getcwd(), "demo_config.toml"))
|
|
492
|
+
if os.path.isfile(seeded_cfg):
|
|
493
|
+
base_path = seeded_cfg
|
|
494
|
+
else:
|
|
495
|
+
defaults = [
|
|
496
|
+
os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "demo_task_apps", "math", "config.toml")),
|
|
497
|
+
]
|
|
498
|
+
mono = "/Users/joshpurtell/Documents/GitHub/monorepo/tests/applications/math/rl/math_online.toml"
|
|
499
|
+
if os.path.isfile(mono):
|
|
500
|
+
defaults.append(mono)
|
|
501
|
+
print("Select a baseline TOML:")
|
|
502
|
+
for i, p in enumerate(defaults, 1):
|
|
503
|
+
print(f" [{i}] {p}")
|
|
504
|
+
choice = input(f"Enter choice [1-{len(defaults)}] (default 1): ").strip() or "1"
|
|
505
|
+
try:
|
|
506
|
+
idx = max(1, min(int(choice), len(defaults))) - 1
|
|
507
|
+
except Exception:
|
|
508
|
+
idx = 0
|
|
509
|
+
base_path = defaults[idx]
|
|
360
510
|
with open(base_path, "r") as fh:
|
|
361
511
|
text = fh.read()
|
|
362
512
|
import re
|
|
@@ -413,6 +563,131 @@ def cmd_configure(args: argparse.Namespace) -> int:
|
|
|
413
563
|
return 0
|
|
414
564
|
|
|
415
565
|
|
|
566
|
+
def cmd_init(args: argparse.Namespace) -> int:
|
|
567
|
+
"""Initialize a Modal-ready Math Task App in the current directory.
|
|
568
|
+
|
|
569
|
+
Copies `examples/rl/task_app.py` and `examples/rl/deploy_task_app.sh` into CWD.
|
|
570
|
+
Creates a `.env` with placeholders if it does not exist.
|
|
571
|
+
"""
|
|
572
|
+
try:
|
|
573
|
+
# Ensure `modal` is installed for deployment flows
|
|
574
|
+
def _has_modal() -> bool:
|
|
575
|
+
try:
|
|
576
|
+
import importlib.util as _iu
|
|
577
|
+
return _iu.find_spec("modal") is not None
|
|
578
|
+
except Exception:
|
|
579
|
+
return False
|
|
580
|
+
|
|
581
|
+
if not _has_modal():
|
|
582
|
+
print("modal not found; installing…")
|
|
583
|
+
# Prefer uv if available; otherwise fallback to pip
|
|
584
|
+
try:
|
|
585
|
+
if shutil.which("uv"):
|
|
586
|
+
code, out = _popen_capture(["uv", "pip", "install", "modal>=1.1.4"])
|
|
587
|
+
else:
|
|
588
|
+
code, out = _popen_capture([sys.executable, "-m", "pip", "install", "modal>=1.1.4"])
|
|
589
|
+
if code != 0:
|
|
590
|
+
print(out)
|
|
591
|
+
print("Failed to install modal; continuing may fail.")
|
|
592
|
+
else:
|
|
593
|
+
print("modal installed successfully.")
|
|
594
|
+
except Exception as e:
|
|
595
|
+
print(f"modal install error: {e}")
|
|
596
|
+
# Re-check
|
|
597
|
+
if not _has_modal():
|
|
598
|
+
print("Warning: modal is still not importable after install attempt.")
|
|
599
|
+
else:
|
|
600
|
+
print("modal found")
|
|
601
|
+
|
|
602
|
+
here = os.getcwd()
|
|
603
|
+
demo_dir = os.path.join(here, "synth_demo")
|
|
604
|
+
os.makedirs(demo_dir, exist_ok=True)
|
|
605
|
+
# Paths inside synth_demo/
|
|
606
|
+
dst_task_py = os.path.join(demo_dir, "task_app.py")
|
|
607
|
+
dst_deploy = os.path.join(demo_dir, "deploy_task_app.sh")
|
|
608
|
+
env_path = os.path.join(demo_dir, ".env")
|
|
609
|
+
dst_cfg = os.path.join(demo_dir, "demo_config.toml")
|
|
610
|
+
|
|
611
|
+
# Copy packaged math modal task app into synth_demo/task_app.py
|
|
612
|
+
src_modal = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "demo_task_apps", "math", "modal_task_app.py"))
|
|
613
|
+
if not os.path.isfile(src_modal):
|
|
614
|
+
print("Init failed: packaged math modal task app not found.")
|
|
615
|
+
print(f"Looked for: {src_modal}")
|
|
616
|
+
return 1
|
|
617
|
+
if os.path.exists(dst_task_py) and not getattr(args, "force", False):
|
|
618
|
+
print(f"Refusing to overwrite existing file: {dst_task_py} (use --force)")
|
|
619
|
+
return 1
|
|
620
|
+
shutil.copy2(src_modal, dst_task_py)
|
|
621
|
+
|
|
622
|
+
# Create deploy script in synth_demo/
|
|
623
|
+
deploy_text = """#!/usr/bin/env bash
|
|
624
|
+
set -euo pipefail
|
|
625
|
+
|
|
626
|
+
HERE=$(cd "$(dirname "$0")" && pwd)
|
|
627
|
+
APP="$HERE/task_app.py"
|
|
628
|
+
if [ -f "$HERE/.env" ]; then
|
|
629
|
+
# shellcheck disable=SC2046
|
|
630
|
+
export $(grep -v '^#' "$HERE/.env" | xargs -I{} echo {})
|
|
631
|
+
fi
|
|
632
|
+
uv run modal deploy "$APP" | tee "$HERE/.last_deploy.log"
|
|
633
|
+
URL=$(grep -Eo 'https://[^ ]+\.modal\.run' "$HERE/.last_deploy.log" | tail -1 || true)
|
|
634
|
+
if [ -n "$URL" ]; then
|
|
635
|
+
if grep -q '^TASK_APP_BASE_URL=' "$HERE/.env" 2>/dev/null; then
|
|
636
|
+
sed -i.bak "s#^TASK_APP_BASE_URL=.*#TASK_APP_BASE_URL=$URL#" "$HERE/.env" || true
|
|
637
|
+
else
|
|
638
|
+
echo "TASK_APP_BASE_URL=$URL" >> "$HERE/.env"
|
|
639
|
+
fi
|
|
640
|
+
echo "Saved TASK_APP_BASE_URL to $HERE/.env"
|
|
641
|
+
fi
|
|
642
|
+
"""
|
|
643
|
+
_write_text(dst_deploy, deploy_text)
|
|
644
|
+
try:
|
|
645
|
+
st = os.stat(dst_deploy)
|
|
646
|
+
os.chmod(dst_deploy, st.st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
|
|
647
|
+
except Exception:
|
|
648
|
+
pass
|
|
649
|
+
|
|
650
|
+
# Seed .env if not present
|
|
651
|
+
if not os.path.exists(env_path):
|
|
652
|
+
_write_text(env_path, "\n".join([
|
|
653
|
+
"# Required for task app auth to environment service",
|
|
654
|
+
"ENVIRONMENT_API_KEY=",
|
|
655
|
+
"",
|
|
656
|
+
"# Optional: for CLI job submission and proxying OpenAI models",
|
|
657
|
+
"SYNTH_API_KEY=",
|
|
658
|
+
"OPENAI_API_KEY=",
|
|
659
|
+
"",
|
|
660
|
+
"# Optional: set to 'prod' to use production names",
|
|
661
|
+
"ENVIRONMENT=",
|
|
662
|
+
]) + "\n")
|
|
663
|
+
|
|
664
|
+
# Seed demo_config.toml from packaged default if not present (or overwrite with --force)
|
|
665
|
+
packaged_cfg = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "demo_task_apps", "math", "config.toml"))
|
|
666
|
+
try:
|
|
667
|
+
if os.path.isfile(packaged_cfg):
|
|
668
|
+
if not os.path.exists(dst_cfg) or getattr(args, "force", False):
|
|
669
|
+
shutil.copy2(packaged_cfg, dst_cfg)
|
|
670
|
+
except Exception:
|
|
671
|
+
pass
|
|
672
|
+
|
|
673
|
+
print("Initialized Math Task App in synth_demo/:")
|
|
674
|
+
print(f" - {dst_task_py}")
|
|
675
|
+
print(f" - {dst_deploy}")
|
|
676
|
+
print(f" - {env_path} (created if missing)")
|
|
677
|
+
if os.path.exists(dst_cfg):
|
|
678
|
+
print(f" - {dst_cfg} (seeded)")
|
|
679
|
+
print("")
|
|
680
|
+
print("Next steps:")
|
|
681
|
+
print(" 1) cd synth_demo && put your ENVIRONMENT_API_KEY in ./.env")
|
|
682
|
+
print(" 2) Deploy to Modal:")
|
|
683
|
+
print(" uvx bash ./deploy_task_app.sh")
|
|
684
|
+
print(" 3) From project root, run: uvx synth-ai rl_demo configure; uvx synth-ai rl_demo run")
|
|
685
|
+
return 0
|
|
686
|
+
except Exception as e:
|
|
687
|
+
print(f"Init error: {e}")
|
|
688
|
+
return 2
|
|
689
|
+
|
|
690
|
+
|
|
416
691
|
def _http(method: str, url: str, headers: Dict[str, str] | None = None, body: Dict[str, Any] | None = None) -> tuple[int, Dict[str, Any] | str]:
|
|
417
692
|
import urllib.request, urllib.error, json as _json
|
|
418
693
|
data = None
|
|
@@ -493,6 +768,19 @@ def cmd_run(args: argparse.Namespace) -> int:
|
|
|
493
768
|
code = _popen_stream(cmd, env=run_env)
|
|
494
769
|
if code != 0:
|
|
495
770
|
print(f"Clustered runner exited with code {code}")
|
|
771
|
+
# Actionable guidance for common auth issues
|
|
772
|
+
try:
|
|
773
|
+
base_url = backend_base.rstrip("/") + "/api"
|
|
774
|
+
except Exception:
|
|
775
|
+
base_url = backend_base
|
|
776
|
+
sk = (env.synth_api_key or "").strip()
|
|
777
|
+
ek = (env.env_api_key or "").strip()
|
|
778
|
+
print("Hint: If backend responded 401, verify SYNTH_API_KEY for:", base_url)
|
|
779
|
+
if sk:
|
|
780
|
+
print(f" SYNTH_API_KEY len={len(sk)} last5={sk[-5:]}")
|
|
781
|
+
if ek:
|
|
782
|
+
print(f" ENVIRONMENT_API_KEY len={len(ek)} last5={ek[-5:]}")
|
|
783
|
+
print("Also ensure your Modal secret contains ENVIRONMENT_API_KEY and matches the task app.")
|
|
496
784
|
return code
|
|
497
785
|
|
|
498
786
|
# Fallback: legacy jobs API flow
|
|
@@ -591,6 +879,17 @@ def cmd_run(args: argparse.Namespace) -> int:
|
|
|
591
879
|
except Exception:
|
|
592
880
|
print(str(js))
|
|
593
881
|
print("Request body was:\n" + json.dumps(body, indent=2))
|
|
882
|
+
# Extra hints for auth failures
|
|
883
|
+
try:
|
|
884
|
+
sk = (env.synth_api_key or "").strip()
|
|
885
|
+
if int(code) == 401 or (isinstance(js, dict) and any(isinstance(v, str) and "Invalid API key" in v for v in js.values())):
|
|
886
|
+
base_url = env.dev_backend_url
|
|
887
|
+
print("Hint: HTTP 401 Unauthorized from backend. Verify SYNTH_API_KEY for:", base_url)
|
|
888
|
+
if sk:
|
|
889
|
+
print(f" SYNTH_API_KEY len={len(sk)} last5={sk[-5:]}")
|
|
890
|
+
print("Also ensure your Modal secret contains a valid ENVIRONMENT_API_KEY.")
|
|
891
|
+
except Exception:
|
|
892
|
+
pass
|
|
594
893
|
return 2
|
|
595
894
|
job_id = js.get("job_id") or js.get("id") or ""
|
|
596
895
|
if not job_id:
|
|
@@ -639,6 +938,96 @@ def cmd_run(args: argparse.Namespace) -> int:
|
|
|
639
938
|
return 0
|
|
640
939
|
|
|
641
940
|
|
|
941
|
+
def cmd_eval(args: argparse.Namespace) -> int:
|
|
942
|
+
env = demo_core.load_env()
|
|
943
|
+
# Ensure required env
|
|
944
|
+
if not env.task_app_base_url:
|
|
945
|
+
print("Task app URL missing. Run: uvx synth-ai rl_demo deploy")
|
|
946
|
+
return 1
|
|
947
|
+
# Load config: prefer CWD demo_config.toml; else packaged default
|
|
948
|
+
cfg_path: str | None = None
|
|
949
|
+
if getattr(args, "config", None):
|
|
950
|
+
p = os.path.abspath(args.config)
|
|
951
|
+
if not os.path.isfile(p):
|
|
952
|
+
print(f"Config not found: {p}")
|
|
953
|
+
return 1
|
|
954
|
+
cfg_path = p
|
|
955
|
+
else:
|
|
956
|
+
cwd_prepared = os.path.abspath(os.path.join(os.getcwd(), "demo_config.toml"))
|
|
957
|
+
if os.path.isfile(cwd_prepared):
|
|
958
|
+
cfg_path = cwd_prepared
|
|
959
|
+
else:
|
|
960
|
+
packaged = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "demo_task_apps", "math", "config.toml"))
|
|
961
|
+
cfg_path = packaged if os.path.isfile(packaged) else None
|
|
962
|
+
if not cfg_path:
|
|
963
|
+
print("No config TOML found.")
|
|
964
|
+
return 1
|
|
965
|
+
|
|
966
|
+
import tomllib
|
|
967
|
+
with open(cfg_path, "rb") as fh:
|
|
968
|
+
inline_cfg = tomllib.load(fh)
|
|
969
|
+
|
|
970
|
+
# Model selection prompt
|
|
971
|
+
default_model = (args.model or (inline_cfg.get("model", {}) or {}).get("name") or "Qwen/Qwen3-0.6B")
|
|
972
|
+
entered = input(f"Model to evaluate [{default_model}]: ").strip()
|
|
973
|
+
model = entered or default_model
|
|
974
|
+
confirm = (input(f"Use model '{model}'? [Y/n]: ").strip().lower() or "y").startswith("y")
|
|
975
|
+
if not confirm:
|
|
976
|
+
print("Aborted by user.")
|
|
977
|
+
return 1
|
|
978
|
+
|
|
979
|
+
# Build on-board rollout request to the Task App (no backend RL job)
|
|
980
|
+
# Use Synth backend chat-completions proxy as inference URL (derive from DEV_BACKEND_URL)
|
|
981
|
+
# Ensure /api suffix for backend, then use proxy prefix for chat completions
|
|
982
|
+
backend_api = (env.dev_backend_url or "https://agent-learning.onrender.com/api").rstrip("/")
|
|
983
|
+
if not backend_api.endswith("/api"):
|
|
984
|
+
backend_api = f"{backend_api}/api"
|
|
985
|
+
inference_url = f"{backend_api}/proxy"
|
|
986
|
+
# ops: alternate agent/env for a small number of decisions (from config max_steps_per_episode if present)
|
|
987
|
+
try:
|
|
988
|
+
steps = int((inline_cfg.get("rollout", {}) or {}).get("max_steps_per_episode", 4))
|
|
989
|
+
except Exception:
|
|
990
|
+
steps = 4
|
|
991
|
+
ops: list[str] = []
|
|
992
|
+
for _ in range(max(1, steps // 2)):
|
|
993
|
+
ops.extend(["agent", "env"])
|
|
994
|
+
env_name = (inline_cfg.get("rollout", {}) or {}).get("env_name") or "math"
|
|
995
|
+
policy_name = (inline_cfg.get("rollout", {}) or {}).get("policy_name") or "math-react"
|
|
996
|
+
run_id = f"eval-{int(time.time())}"
|
|
997
|
+
body: Dict[str, Any] = {
|
|
998
|
+
"run_id": run_id,
|
|
999
|
+
"env": {
|
|
1000
|
+
"env_name": env_name,
|
|
1001
|
+
"config": inline_cfg.get("rollout", {}) or {},
|
|
1002
|
+
},
|
|
1003
|
+
"policy": {
|
|
1004
|
+
"policy_name": policy_name,
|
|
1005
|
+
"config": {"model": model, "inference_url": inference_url},
|
|
1006
|
+
},
|
|
1007
|
+
"ops": ops,
|
|
1008
|
+
"on_done": "terminate",
|
|
1009
|
+
}
|
|
1010
|
+
# POST to task app rollout endpoint
|
|
1011
|
+
headers = {"Content-Type": "application/json"}
|
|
1012
|
+
if env.env_api_key:
|
|
1013
|
+
headers["X-API-Key"] = env.env_api_key
|
|
1014
|
+
rc, resp = _http("POST", env.task_app_base_url.rstrip("/") + "/rollout", headers=headers, body=body)
|
|
1015
|
+
if rc not in (200, 201) or not isinstance(resp, dict):
|
|
1016
|
+
print("Eval rollout failed:", rc)
|
|
1017
|
+
try:
|
|
1018
|
+
print(json.dumps(resp, indent=2) if isinstance(resp, dict) else str(resp))
|
|
1019
|
+
except Exception:
|
|
1020
|
+
print(str(resp))
|
|
1021
|
+
print("Request body was:\n" + json.dumps(body, indent=2))
|
|
1022
|
+
return 2
|
|
1023
|
+
metrics = (resp.get("metrics") if isinstance(resp, dict) else None) or {}
|
|
1024
|
+
mean = metrics.get("mean_return")
|
|
1025
|
+
if mean is not None:
|
|
1026
|
+
print(f"eval.reward_mean={mean}")
|
|
1027
|
+
else:
|
|
1028
|
+
print(json.dumps(resp, indent=2))
|
|
1029
|
+
return 0
|
|
1030
|
+
|
|
642
1031
|
def main(argv: list[str] | None = None) -> int:
|
|
643
1032
|
p = argparse.ArgumentParser(prog="synth-ai")
|
|
644
1033
|
sub = p.add_subparsers(dest="cmd")
|
|
@@ -650,6 +1039,12 @@ def main(argv: list[str] | None = None) -> int:
|
|
|
650
1039
|
|
|
651
1040
|
_add_parser(["rl_demo.check", "demo.check"], configure=lambda parser: parser.set_defaults(func=cmd_check))
|
|
652
1041
|
|
|
1042
|
+
def _init_opts(parser):
|
|
1043
|
+
parser.add_argument("--force", action="store_true", help="Overwrite existing files in CWD")
|
|
1044
|
+
parser.set_defaults(func=cmd_init)
|
|
1045
|
+
|
|
1046
|
+
_add_parser(["rl_demo.init", "demo.init"], configure=_init_opts)
|
|
1047
|
+
|
|
653
1048
|
# (prepare command removed)
|
|
654
1049
|
|
|
655
1050
|
def _deploy_opts(parser):
|
|
@@ -674,6 +1069,14 @@ def main(argv: list[str] | None = None) -> int:
|
|
|
674
1069
|
|
|
675
1070
|
_add_parser(["rl_demo.run", "demo.run"], configure=_run_opts)
|
|
676
1071
|
|
|
1072
|
+
def _eval_opts(parser):
|
|
1073
|
+
parser.add_argument("--config", type=str, default=None, help="Path to TOML config (optional)")
|
|
1074
|
+
parser.add_argument("--model", type=str, default=None, help="Model to evaluate (default Qwen/Qwen3-0.6B)")
|
|
1075
|
+
parser.add_argument("--timeout", type=int, default=300, help="Seconds to wait for metrics")
|
|
1076
|
+
parser.set_defaults(func=cmd_eval)
|
|
1077
|
+
|
|
1078
|
+
_add_parser(["rl_demo.eval", "demo.eval"], configure=_eval_opts)
|
|
1079
|
+
|
|
677
1080
|
args = p.parse_args(argv)
|
|
678
1081
|
if not hasattr(args, "func"):
|
|
679
1082
|
p.print_help()
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
"""Minimal helpers for the math task app.
|
|
4
|
+
|
|
5
|
+
This module provides a local fallback for install_problem_bank_into_shared so
|
|
6
|
+
the modal task app can import it without requiring an external math_rl package.
|
|
7
|
+
"""
|
|
8
|
+
|
|
9
|
+
def install_problem_bank_into_shared() -> None:
|
|
10
|
+
"""No-op placeholder for installing the Hendrycks MATH problem bank.
|
|
11
|
+
|
|
12
|
+
In production deployments, this can download or unpack the problem bank
|
|
13
|
+
into a shared directory. For the demo scaffold, it is a no-op.
|
|
14
|
+
"""
|
|
15
|
+
return None
|
|
16
|
+
|
|
17
|
+
|