synth-ai 0.2.6__py3-none-any.whl → 0.2.6.dev2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

@@ -6,11 +6,23 @@ import os
6
6
  import sys
7
7
  import time
8
8
  from typing import Any, Dict, Callable
9
+ import shutil
10
+ import stat
9
11
 
10
12
  from synth_ai.demos.demo_task_apps import core as demo_core
11
13
  from synth_ai.demos.demo_task_apps.core import DemoEnv
12
14
 
13
15
 
16
+ def _is_modal_public_url(u: str) -> bool:
17
+ try:
18
+ s = (u or "").strip().lower()
19
+ if not (s.startswith("http://") or s.startswith("https://")):
20
+ return False
21
+ return (".modal.run" in s) and ("modal.local" not in s) and ("pypi-mirror" not in s)
22
+ except Exception:
23
+ return False
24
+
25
+
14
26
  def cmd_check(_args: argparse.Namespace) -> int:
15
27
  env = demo_core.load_env()
16
28
  cwd_env_path = os.path.join(os.getcwd(), ".env")
@@ -21,6 +33,15 @@ def cmd_check(_args: argparse.Namespace) -> int:
21
33
  env = demo_core.load_env()
22
34
  local_env = demo_core.load_dotenv_file(cwd_env_path)
23
35
 
36
+ def _is_modal_public_url(u: str) -> bool:
37
+ try:
38
+ s = (u or "").strip().lower()
39
+ if not (s.startswith("http://") or s.startswith("https://")):
40
+ return False
41
+ return (".modal.run" in s) and ("modal.local" not in s) and ("pypi-mirror" not in s)
42
+ except Exception:
43
+ return False
44
+
24
45
  def _maybe_fix_task_url() -> None:
25
46
  if not env.task_app_name:
26
47
  return
@@ -28,9 +49,7 @@ def cmd_check(_args: argparse.Namespace) -> int:
28
49
  needs_lookup = False
29
50
  if not current:
30
51
  needs_lookup = True
31
- elif not current.endswith(".run") or current.endswith(".moda") or current.count(".") < 2:
32
- needs_lookup = True
33
- elif not current.startswith("http://") and not current.startswith("https://"):
52
+ elif not _is_modal_public_url(current):
34
53
  needs_lookup = True
35
54
  if not needs_lookup:
36
55
  return
@@ -48,7 +67,7 @@ def cmd_check(_args: argparse.Namespace) -> int:
48
67
  return
49
68
  new_url = ""
50
69
  for token in out.split():
51
- if token.startswith("http://") or token.startswith("https://"):
70
+ if _is_modal_public_url(token):
52
71
  new_url = token.strip().rstrip("/")
53
72
  break
54
73
  if new_url and new_url != current:
@@ -166,6 +185,45 @@ def _popen_stream(cmd: list[str], cwd: str | None = None, env: dict | None = Non
166
185
  return int(proc.returncode or 0)
167
186
 
168
187
 
188
+ def _popen_stream_capture(cmd: list[str], cwd: str | None = None, env: dict | None = None) -> tuple[int, str]:
189
+ """Stream subprocess output to stdout and also capture it into a buffer."""
190
+ import subprocess
191
+ import threading
192
+
193
+ buf_lines: list[str] = []
194
+ try:
195
+ proc = subprocess.Popen(
196
+ cmd,
197
+ cwd=cwd,
198
+ env=env,
199
+ stdout=subprocess.PIPE,
200
+ stderr=subprocess.STDOUT,
201
+ text=True,
202
+ bufsize=1,
203
+ )
204
+ except Exception as exc:
205
+ print(f"Failed to launch {' '.join(cmd)}: {exc}")
206
+ return 1, ""
207
+
208
+ def _pump(stdout) -> None:
209
+ try:
210
+ for line in stdout:
211
+ line = line.rstrip()
212
+ print(line)
213
+ buf_lines.append(line)
214
+ except Exception:
215
+ pass
216
+
217
+ if proc.stdout is not None:
218
+ t = threading.Thread(target=_pump, args=(proc.stdout,), daemon=True)
219
+ t.start()
220
+ proc.wait()
221
+ t.join(timeout=1.0)
222
+ else:
223
+ proc.wait()
224
+ return int(proc.returncode or 0), "\n".join(buf_lines)
225
+
226
+
169
227
  def cmd_deploy(args: argparse.Namespace) -> int:
170
228
  env = demo_core.load_env()
171
229
  url = ""
@@ -187,9 +245,12 @@ def cmd_deploy(args: argparse.Namespace) -> int:
187
245
  # Auto-detect app path if not supplied; prompt for name and confirmation.
188
246
  app_path = os.path.abspath(args.app) if args.app else None
189
247
  if not app_path or not os.path.isfile(app_path):
248
+ # Prefer the synth_demo/ app seeded by `rl_demo init` over any root-level files
190
249
  candidates = [
250
+ os.path.abspath(os.path.join(os.getcwd(), "synth_demo", "task_app.py")),
251
+ os.path.abspath(os.path.join(os.getcwd(), "task_app.py")),
252
+ os.path.abspath(os.path.join(os.getcwd(), "app.py")),
191
253
  os.path.abspath(os.path.join(os.getcwd(), "math_task_app.py")),
192
- "/Users/joshpurtell/Documents/GitHub/monorepo/tests/applications/math/rl/math_task_app.py",
193
254
  ]
194
255
  app_path = next((p for p in candidates if os.path.isfile(p)), None)
195
256
  if not app_path and args.script:
@@ -200,12 +261,14 @@ def cmd_deploy(args: argparse.Namespace) -> int:
200
261
  app_name = args.name
201
262
  else:
202
263
  if not app_path:
203
- entered = input("Path to Modal app.py (e.g., tests/applications/math/rl/math_task_app.py): ").strip()
264
+ entered = input("Path to Modal app.py (e.g., ./task_app.py): ").strip()
204
265
  if not entered:
205
266
  raise FileNotFoundError("No app.py path provided and auto-detect failed")
206
267
  app_path = os.path.abspath(entered)
207
268
  if not os.path.isfile(app_path):
208
269
  raise FileNotFoundError(f"App file not found: {app_path}")
270
+ # Surface the app path before asking for the name
271
+ print(f"Using task app: {app_path}")
209
272
  suggested_name = args.name or f"synth-{os.path.splitext(os.path.basename(app_path))[0]}"
210
273
  name_in = input(f"Modal app name [{suggested_name}]: ").strip() or suggested_name
211
274
  app_name = name_in
@@ -217,22 +280,44 @@ def cmd_deploy(args: argparse.Namespace) -> int:
217
280
  print("Aborted by user.")
218
281
  return 1
219
282
  deploy_cmd = ["uv", "run", "python", "-m", "modal", "deploy", "--name", name_in, app_path]
220
- code, out = _popen_capture(deploy_cmd)
221
- print(out)
283
+ print("\nStreaming Modal build/deploy logs (this can take several minutes on first run)…\n")
284
+ code, deploy_logs = _popen_stream_capture(deploy_cmd)
222
285
  if code != 0:
223
286
  raise RuntimeError(f"modal deploy failed (exit {code})")
287
+ # Try to parse URL directly from streamed logs
288
+ if not url:
289
+ try:
290
+ import re as _re
291
+ m_all = _re.findall(r"https?://[^\s]+\.modal\.run", deploy_logs or "")
292
+ if m_all:
293
+ url = m_all[-1].strip().rstrip("/")
294
+ except Exception:
295
+ pass
224
296
  url_cmd = ["uv", "run", "python", "-m", "modal", "app", "url", name_in]
225
297
  code2, out2 = _popen_capture(url_cmd)
226
298
  if code2 == 0:
227
299
  for token in out2.split():
228
- if token.startswith("http://") or token.startswith("https://"):
300
+ if _is_modal_public_url(token):
229
301
  url = token.strip().rstrip("/")
230
302
  break
303
+ # Fallback: try reading recent Modal logs for the app to find a URL line
231
304
  if not url:
232
- for token in (out + "\n" + out2).split():
233
- if token.startswith("http://") or token.startswith("https://"):
234
- url = token.strip().rstrip("/")
235
- break
305
+ code3, out3 = _popen_capture(["uv", "run", "python", "-m", "modal", "app", "list"])
306
+ if code3 == 0 and out3:
307
+ for line in out3.splitlines():
308
+ if name_in in line:
309
+ for token in line.split():
310
+ if _is_modal_public_url(token):
311
+ url = token.strip().rstrip("/")
312
+ break
313
+ if url:
314
+ break
315
+ # Prompt user if still no valid URL
316
+ if not url:
317
+ print("\nCould not auto-detect a public Modal URL for the app.")
318
+ entered = input("Enter the Modal public URL (must contain '.modal.run'), or press Enter to abort: ").strip()
319
+ if entered and _is_modal_public_url(entered):
320
+ url = entered.rstrip("/")
236
321
  if not url:
237
322
  raise RuntimeError("Failed to resolve public URL from modal CLI output")
238
323
  if not url:
@@ -286,9 +371,34 @@ def cmd_configure(args: argparse.Namespace) -> int:
286
371
  demo_core.persist_dotenv_values({"ENVIRONMENT_API_KEY": env_key})
287
372
 
288
373
  task_url = env.task_app_base_url
289
- if not task_url:
290
- print("Task app URL missing. Run: uvx synth-ai rl_demo deploy")
291
- return 1
374
+ if not task_url or not _is_modal_public_url(task_url):
375
+ # If we have an app name, offer to resolve from Modal first
376
+ resolved = ""
377
+ if env.task_app_name:
378
+ try:
379
+ choice = input(f"Resolve URL from Modal for app '{env.task_app_name}'? [Y/n]: ").strip().lower() or "y"
380
+ if choice.startswith("y"):
381
+ code, out = _popen_capture([
382
+ "uv", "run", "python", "-m", "modal", "app", "url", env.task_app_name
383
+ ])
384
+ if code == 0 and out:
385
+ for tok in out.split():
386
+ if _is_modal_public_url(tok):
387
+ resolved = tok.strip().rstrip("/")
388
+ break
389
+ except Exception:
390
+ resolved = ""
391
+ if not resolved:
392
+ print("Task app URL not configured or not a valid Modal public URL.")
393
+ print("Examples: https://<app-name>-fastapi-app.modal.run")
394
+ entered = input("Enter Task App base URL (must contain '.modal.run'), or press Enter to abort: ").strip()
395
+ if not entered or not _is_modal_public_url(entered):
396
+ print("Valid Task App URL is required. Run: uvx synth-ai rl_demo deploy")
397
+ return 1
398
+ task_url = entered.rstrip("/")
399
+ else:
400
+ task_url = resolved
401
+ demo_core.persist_task_url(task_url, name=(env.task_app_name or None))
292
402
 
293
403
  app_name = env.task_app_name.strip()
294
404
  if not app_name:
@@ -317,20 +427,56 @@ def cmd_configure(args: argparse.Namespace) -> int:
317
427
  secret_args.append(f"SYNTH_API_KEY={synth_for_secret}")
318
428
 
319
429
  create_cmd = ["uv", "run", "modal", "secret", "create", secret_name, *secret_args]
320
- code, out = _popen_capture(create_cmd)
430
+ def _mask_args(args: list[str]) -> list[str]:
431
+ masked: list[str] = []
432
+ for a in args:
433
+ if "=" in a and any(a.startswith(k + "=") for k in ("ENVIRONMENT_API_KEY", "OPENAI_API_KEY", "SYNTH_API_KEY")):
434
+ try:
435
+ k, v = a.split("=", 1)
436
+ suf = v[-5:] if len(v) >= 5 else ""
437
+ masked.append(f"{k}=***{suf}")
438
+ except Exception:
439
+ masked.append("<masked>")
440
+ else:
441
+ masked.append(a)
442
+ return masked
443
+
444
+ print("\n[configure] Creating Modal secret (streaming logs)…")
445
+ print("[configure] Command:", " ".join(_mask_args(create_cmd)))
446
+ code = _popen_stream(create_cmd)
321
447
  if code != 0:
322
- print(out)
323
- print("Secret create failed; retrying with delete → create…")
324
- _popen_capture(["uv", "run", "modal", "secret", "delete", secret_name])
325
- code, out = _popen_capture(create_cmd)
448
+ print("[configure] Secret create failed; attempting delete → create")
449
+ delete_cmd = ["bash", "-lc", f"printf 'y\\n' | uv run modal secret delete {secret_name}"]
450
+ print("[configure] Command:", " ".join(delete_cmd))
451
+ _popen_stream(delete_cmd)
452
+ print("[configure] Retrying secret create…")
453
+ print("[configure] Command:", " ".join(_mask_args(create_cmd)))
454
+ code = _popen_stream(create_cmd)
326
455
  if code != 0:
327
- print(out)
328
- print("Failed to provision Modal secret.")
456
+ print("[configure] Failed to provision Modal secret.")
329
457
  return 2
330
458
 
331
459
  # Verify task app can read the secret by hitting rollout health with X-API-Key.
332
460
  rollout_url = task_url.rstrip("/") + "/health/rollout"
333
- rc, body = _http("GET", rollout_url, headers={"X-API-Key": env_key})
461
+ print("[configure] Verifying rollout health:")
462
+ # Prefer rollout-specific health first (auth-aware), then plain /health
463
+ health_base = task_url.rstrip("/")
464
+ health_urls = [f"{health_base}/health/rollout", f"{health_base}/health"]
465
+ rc = 0
466
+ body = ""
467
+ for h in health_urls:
468
+ print("[configure] GET", h)
469
+ rc, body = _http("GET", h, headers={"X-API-Key": env_key})
470
+ if rc == 200:
471
+ rollout_url = h
472
+ break
473
+ print("[configure] status:", rc)
474
+ try:
475
+ import json as _json
476
+ preview = _json.dumps(body)[:800] if isinstance(body, dict) else str(body)[:800]
477
+ except Exception:
478
+ preview = str(body)[:800]
479
+ print("[configure] body:", preview)
334
480
  if rc != 200:
335
481
  print(f"Warning: rollout health check failed ({rc}). Response: {body}")
336
482
  else:
@@ -341,22 +487,26 @@ def cmd_configure(args: argparse.Namespace) -> int:
341
487
  env.task_app_name = app_name
342
488
  env.task_app_secret_name = secret_name
343
489
 
344
- # Prepare a baseline TOML (formerly `prepare`): prompt and write demo_config.toml
345
- defaults = [
346
- os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "demo_task_apps", "math", "config.toml")),
347
- ]
348
- mono = "/Users/joshpurtell/Documents/GitHub/monorepo/tests/applications/math/rl/math_online.toml"
349
- if os.path.isfile(mono):
350
- defaults.append(mono)
351
- print("Select a baseline TOML:")
352
- for i, p in enumerate(defaults, 1):
353
- print(f" [{i}] {p}")
354
- choice = input(f"Enter choice [1-{len(defaults)}] (default 1): ").strip() or "1"
355
- try:
356
- idx = max(1, min(int(choice), len(defaults))) - 1
357
- except Exception:
358
- idx = 0
359
- base_path = defaults[idx]
490
+ # Prefer the seeded CWD config if present; otherwise fall back to packaged default
491
+ seeded_cfg = os.path.abspath(os.path.join(os.getcwd(), "demo_config.toml"))
492
+ if os.path.isfile(seeded_cfg):
493
+ base_path = seeded_cfg
494
+ else:
495
+ defaults = [
496
+ os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "demo_task_apps", "math", "config.toml")),
497
+ ]
498
+ mono = "/Users/joshpurtell/Documents/GitHub/monorepo/tests/applications/math/rl/math_online.toml"
499
+ if os.path.isfile(mono):
500
+ defaults.append(mono)
501
+ print("Select a baseline TOML:")
502
+ for i, p in enumerate(defaults, 1):
503
+ print(f" [{i}] {p}")
504
+ choice = input(f"Enter choice [1-{len(defaults)}] (default 1): ").strip() or "1"
505
+ try:
506
+ idx = max(1, min(int(choice), len(defaults))) - 1
507
+ except Exception:
508
+ idx = 0
509
+ base_path = defaults[idx]
360
510
  with open(base_path, "r") as fh:
361
511
  text = fh.read()
362
512
  import re
@@ -413,6 +563,131 @@ def cmd_configure(args: argparse.Namespace) -> int:
413
563
  return 0
414
564
 
415
565
 
566
+ def cmd_init(args: argparse.Namespace) -> int:
567
+ """Initialize a Modal-ready Math Task App in the current directory.
568
+
569
+ Copies `examples/rl/task_app.py` and `examples/rl/deploy_task_app.sh` into CWD.
570
+ Creates a `.env` with placeholders if it does not exist.
571
+ """
572
+ try:
573
+ # Ensure `modal` is installed for deployment flows
574
+ def _has_modal() -> bool:
575
+ try:
576
+ import importlib.util as _iu
577
+ return _iu.find_spec("modal") is not None
578
+ except Exception:
579
+ return False
580
+
581
+ if not _has_modal():
582
+ print("modal not found; installing…")
583
+ # Prefer uv if available; otherwise fallback to pip
584
+ try:
585
+ if shutil.which("uv"):
586
+ code, out = _popen_capture(["uv", "pip", "install", "modal>=1.1.4"])
587
+ else:
588
+ code, out = _popen_capture([sys.executable, "-m", "pip", "install", "modal>=1.1.4"])
589
+ if code != 0:
590
+ print(out)
591
+ print("Failed to install modal; continuing may fail.")
592
+ else:
593
+ print("modal installed successfully.")
594
+ except Exception as e:
595
+ print(f"modal install error: {e}")
596
+ # Re-check
597
+ if not _has_modal():
598
+ print("Warning: modal is still not importable after install attempt.")
599
+ else:
600
+ print("modal found")
601
+
602
+ here = os.getcwd()
603
+ demo_dir = os.path.join(here, "synth_demo")
604
+ os.makedirs(demo_dir, exist_ok=True)
605
+ # Paths inside synth_demo/
606
+ dst_task_py = os.path.join(demo_dir, "task_app.py")
607
+ dst_deploy = os.path.join(demo_dir, "deploy_task_app.sh")
608
+ env_path = os.path.join(demo_dir, ".env")
609
+ dst_cfg = os.path.join(demo_dir, "demo_config.toml")
610
+
611
+ # Copy packaged math modal task app into synth_demo/task_app.py
612
+ src_modal = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "demo_task_apps", "math", "modal_task_app.py"))
613
+ if not os.path.isfile(src_modal):
614
+ print("Init failed: packaged math modal task app not found.")
615
+ print(f"Looked for: {src_modal}")
616
+ return 1
617
+ if os.path.exists(dst_task_py) and not getattr(args, "force", False):
618
+ print(f"Refusing to overwrite existing file: {dst_task_py} (use --force)")
619
+ return 1
620
+ shutil.copy2(src_modal, dst_task_py)
621
+
622
+ # Create deploy script in synth_demo/
623
+ deploy_text = """#!/usr/bin/env bash
624
+ set -euo pipefail
625
+
626
+ HERE=$(cd "$(dirname "$0")" && pwd)
627
+ APP="$HERE/task_app.py"
628
+ if [ -f "$HERE/.env" ]; then
629
+ # shellcheck disable=SC2046
630
+ export $(grep -v '^#' "$HERE/.env" | xargs -I{} echo {})
631
+ fi
632
+ uv run modal deploy "$APP" | tee "$HERE/.last_deploy.log"
633
+ URL=$(grep -Eo 'https://[^ ]+\.modal\.run' "$HERE/.last_deploy.log" | tail -1 || true)
634
+ if [ -n "$URL" ]; then
635
+ if grep -q '^TASK_APP_BASE_URL=' "$HERE/.env" 2>/dev/null; then
636
+ sed -i.bak "s#^TASK_APP_BASE_URL=.*#TASK_APP_BASE_URL=$URL#" "$HERE/.env" || true
637
+ else
638
+ echo "TASK_APP_BASE_URL=$URL" >> "$HERE/.env"
639
+ fi
640
+ echo "Saved TASK_APP_BASE_URL to $HERE/.env"
641
+ fi
642
+ """
643
+ _write_text(dst_deploy, deploy_text)
644
+ try:
645
+ st = os.stat(dst_deploy)
646
+ os.chmod(dst_deploy, st.st_mode | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
647
+ except Exception:
648
+ pass
649
+
650
+ # Seed .env if not present
651
+ if not os.path.exists(env_path):
652
+ _write_text(env_path, "\n".join([
653
+ "# Required for task app auth to environment service",
654
+ "ENVIRONMENT_API_KEY=",
655
+ "",
656
+ "# Optional: for CLI job submission and proxying OpenAI models",
657
+ "SYNTH_API_KEY=",
658
+ "OPENAI_API_KEY=",
659
+ "",
660
+ "# Optional: set to 'prod' to use production names",
661
+ "ENVIRONMENT=",
662
+ ]) + "\n")
663
+
664
+ # Seed demo_config.toml from packaged default if not present (or overwrite with --force)
665
+ packaged_cfg = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "demo_task_apps", "math", "config.toml"))
666
+ try:
667
+ if os.path.isfile(packaged_cfg):
668
+ if not os.path.exists(dst_cfg) or getattr(args, "force", False):
669
+ shutil.copy2(packaged_cfg, dst_cfg)
670
+ except Exception:
671
+ pass
672
+
673
+ print("Initialized Math Task App in synth_demo/:")
674
+ print(f" - {dst_task_py}")
675
+ print(f" - {dst_deploy}")
676
+ print(f" - {env_path} (created if missing)")
677
+ if os.path.exists(dst_cfg):
678
+ print(f" - {dst_cfg} (seeded)")
679
+ print("")
680
+ print("Next steps:")
681
+ print(" 1) cd synth_demo && put your ENVIRONMENT_API_KEY in ./.env")
682
+ print(" 2) Deploy to Modal:")
683
+ print(" uvx bash ./deploy_task_app.sh")
684
+ print(" 3) From project root, run: uvx synth-ai rl_demo configure; uvx synth-ai rl_demo run")
685
+ return 0
686
+ except Exception as e:
687
+ print(f"Init error: {e}")
688
+ return 2
689
+
690
+
416
691
  def _http(method: str, url: str, headers: Dict[str, str] | None = None, body: Dict[str, Any] | None = None) -> tuple[int, Dict[str, Any] | str]:
417
692
  import urllib.request, urllib.error, json as _json
418
693
  data = None
@@ -493,6 +768,19 @@ def cmd_run(args: argparse.Namespace) -> int:
493
768
  code = _popen_stream(cmd, env=run_env)
494
769
  if code != 0:
495
770
  print(f"Clustered runner exited with code {code}")
771
+ # Actionable guidance for common auth issues
772
+ try:
773
+ base_url = backend_base.rstrip("/") + "/api"
774
+ except Exception:
775
+ base_url = backend_base
776
+ sk = (env.synth_api_key or "").strip()
777
+ ek = (env.env_api_key or "").strip()
778
+ print("Hint: If backend responded 401, verify SYNTH_API_KEY for:", base_url)
779
+ if sk:
780
+ print(f" SYNTH_API_KEY len={len(sk)} last5={sk[-5:]}")
781
+ if ek:
782
+ print(f" ENVIRONMENT_API_KEY len={len(ek)} last5={ek[-5:]}")
783
+ print("Also ensure your Modal secret contains ENVIRONMENT_API_KEY and matches the task app.")
496
784
  return code
497
785
 
498
786
  # Fallback: legacy jobs API flow
@@ -591,6 +879,17 @@ def cmd_run(args: argparse.Namespace) -> int:
591
879
  except Exception:
592
880
  print(str(js))
593
881
  print("Request body was:\n" + json.dumps(body, indent=2))
882
+ # Extra hints for auth failures
883
+ try:
884
+ sk = (env.synth_api_key or "").strip()
885
+ if int(code) == 401 or (isinstance(js, dict) and any(isinstance(v, str) and "Invalid API key" in v for v in js.values())):
886
+ base_url = env.dev_backend_url
887
+ print("Hint: HTTP 401 Unauthorized from backend. Verify SYNTH_API_KEY for:", base_url)
888
+ if sk:
889
+ print(f" SYNTH_API_KEY len={len(sk)} last5={sk[-5:]}")
890
+ print("Also ensure your Modal secret contains a valid ENVIRONMENT_API_KEY.")
891
+ except Exception:
892
+ pass
594
893
  return 2
595
894
  job_id = js.get("job_id") or js.get("id") or ""
596
895
  if not job_id:
@@ -639,6 +938,96 @@ def cmd_run(args: argparse.Namespace) -> int:
639
938
  return 0
640
939
 
641
940
 
941
+ def cmd_eval(args: argparse.Namespace) -> int:
942
+ env = demo_core.load_env()
943
+ # Ensure required env
944
+ if not env.task_app_base_url:
945
+ print("Task app URL missing. Run: uvx synth-ai rl_demo deploy")
946
+ return 1
947
+ # Load config: prefer CWD demo_config.toml; else packaged default
948
+ cfg_path: str | None = None
949
+ if getattr(args, "config", None):
950
+ p = os.path.abspath(args.config)
951
+ if not os.path.isfile(p):
952
+ print(f"Config not found: {p}")
953
+ return 1
954
+ cfg_path = p
955
+ else:
956
+ cwd_prepared = os.path.abspath(os.path.join(os.getcwd(), "demo_config.toml"))
957
+ if os.path.isfile(cwd_prepared):
958
+ cfg_path = cwd_prepared
959
+ else:
960
+ packaged = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "demo_task_apps", "math", "config.toml"))
961
+ cfg_path = packaged if os.path.isfile(packaged) else None
962
+ if not cfg_path:
963
+ print("No config TOML found.")
964
+ return 1
965
+
966
+ import tomllib
967
+ with open(cfg_path, "rb") as fh:
968
+ inline_cfg = tomllib.load(fh)
969
+
970
+ # Model selection prompt
971
+ default_model = (args.model or (inline_cfg.get("model", {}) or {}).get("name") or "Qwen/Qwen3-0.6B")
972
+ entered = input(f"Model to evaluate [{default_model}]: ").strip()
973
+ model = entered or default_model
974
+ confirm = (input(f"Use model '{model}'? [Y/n]: ").strip().lower() or "y").startswith("y")
975
+ if not confirm:
976
+ print("Aborted by user.")
977
+ return 1
978
+
979
+ # Build on-board rollout request to the Task App (no backend RL job)
980
+ # Use Synth backend chat-completions proxy as inference URL (derive from DEV_BACKEND_URL)
981
+ # Ensure /api suffix for backend, then use proxy prefix for chat completions
982
+ backend_api = (env.dev_backend_url or "https://agent-learning.onrender.com/api").rstrip("/")
983
+ if not backend_api.endswith("/api"):
984
+ backend_api = f"{backend_api}/api"
985
+ inference_url = f"{backend_api}/proxy"
986
+ # ops: alternate agent/env for a small number of decisions (from config max_steps_per_episode if present)
987
+ try:
988
+ steps = int((inline_cfg.get("rollout", {}) or {}).get("max_steps_per_episode", 4))
989
+ except Exception:
990
+ steps = 4
991
+ ops: list[str] = []
992
+ for _ in range(max(1, steps // 2)):
993
+ ops.extend(["agent", "env"])
994
+ env_name = (inline_cfg.get("rollout", {}) or {}).get("env_name") or "math"
995
+ policy_name = (inline_cfg.get("rollout", {}) or {}).get("policy_name") or "math-react"
996
+ run_id = f"eval-{int(time.time())}"
997
+ body: Dict[str, Any] = {
998
+ "run_id": run_id,
999
+ "env": {
1000
+ "env_name": env_name,
1001
+ "config": inline_cfg.get("rollout", {}) or {},
1002
+ },
1003
+ "policy": {
1004
+ "policy_name": policy_name,
1005
+ "config": {"model": model, "inference_url": inference_url},
1006
+ },
1007
+ "ops": ops,
1008
+ "on_done": "terminate",
1009
+ }
1010
+ # POST to task app rollout endpoint
1011
+ headers = {"Content-Type": "application/json"}
1012
+ if env.env_api_key:
1013
+ headers["X-API-Key"] = env.env_api_key
1014
+ rc, resp = _http("POST", env.task_app_base_url.rstrip("/") + "/rollout", headers=headers, body=body)
1015
+ if rc not in (200, 201) or not isinstance(resp, dict):
1016
+ print("Eval rollout failed:", rc)
1017
+ try:
1018
+ print(json.dumps(resp, indent=2) if isinstance(resp, dict) else str(resp))
1019
+ except Exception:
1020
+ print(str(resp))
1021
+ print("Request body was:\n" + json.dumps(body, indent=2))
1022
+ return 2
1023
+ metrics = (resp.get("metrics") if isinstance(resp, dict) else None) or {}
1024
+ mean = metrics.get("mean_return")
1025
+ if mean is not None:
1026
+ print(f"eval.reward_mean={mean}")
1027
+ else:
1028
+ print(json.dumps(resp, indent=2))
1029
+ return 0
1030
+
642
1031
  def main(argv: list[str] | None = None) -> int:
643
1032
  p = argparse.ArgumentParser(prog="synth-ai")
644
1033
  sub = p.add_subparsers(dest="cmd")
@@ -650,6 +1039,12 @@ def main(argv: list[str] | None = None) -> int:
650
1039
 
651
1040
  _add_parser(["rl_demo.check", "demo.check"], configure=lambda parser: parser.set_defaults(func=cmd_check))
652
1041
 
1042
+ def _init_opts(parser):
1043
+ parser.add_argument("--force", action="store_true", help="Overwrite existing files in CWD")
1044
+ parser.set_defaults(func=cmd_init)
1045
+
1046
+ _add_parser(["rl_demo.init", "demo.init"], configure=_init_opts)
1047
+
653
1048
  # (prepare command removed)
654
1049
 
655
1050
  def _deploy_opts(parser):
@@ -674,6 +1069,14 @@ def main(argv: list[str] | None = None) -> int:
674
1069
 
675
1070
  _add_parser(["rl_demo.run", "demo.run"], configure=_run_opts)
676
1071
 
1072
+ def _eval_opts(parser):
1073
+ parser.add_argument("--config", type=str, default=None, help="Path to TOML config (optional)")
1074
+ parser.add_argument("--model", type=str, default=None, help="Model to evaluate (default Qwen/Qwen3-0.6B)")
1075
+ parser.add_argument("--timeout", type=int, default=300, help="Seconds to wait for metrics")
1076
+ parser.set_defaults(func=cmd_eval)
1077
+
1078
+ _add_parser(["rl_demo.eval", "demo.eval"], configure=_eval_opts)
1079
+
677
1080
  args = p.parse_args(argv)
678
1081
  if not hasattr(args, "func"):
679
1082
  p.print_help()
@@ -0,0 +1,17 @@
1
+ from __future__ import annotations
2
+
3
+ """Minimal helpers for the math task app.
4
+
5
+ This module provides a local fallback for install_problem_bank_into_shared so
6
+ the modal task app can import it without requiring an external math_rl package.
7
+ """
8
+
9
+ def install_problem_bank_into_shared() -> None:
10
+ """No-op placeholder for installing the Hendrycks MATH problem bank.
11
+
12
+ In production deployments, this can download or unpack the problem bank
13
+ into a shared directory. For the demo scaffold, it is a no-op.
14
+ """
15
+ return None
16
+
17
+