synth-ai 0.2.4.dev8__py3-none-any.whl → 0.2.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (112) hide show
  1. synth_ai/__init__.py +1 -1
  2. synth_ai/cli/__init__.py +6 -0
  3. synth_ai/cli/demo.py +68 -9
  4. synth_ai/cli/rl_demo.py +137 -0
  5. synth_ai/cli/root.py +65 -0
  6. synth_ai/demos/core/__init__.py +1 -0
  7. synth_ai/demos/core/cli.py +685 -0
  8. synth_ai/demos/demo_task_apps/__init__.py +1 -0
  9. synth_ai/demos/demo_task_apps/core.py +374 -0
  10. synth_ai/demos/demo_task_apps/math/__init__.py +1 -0
  11. synth_ai/demos/demo_task_apps/math/app.py +37 -0
  12. synth_ai/demos/demo_task_apps/math/config.toml +44 -0
  13. synth_ai/demos/demo_task_apps/math/deploy_modal.py +60 -0
  14. synth_ai/demos/demo_task_apps/math/deploy_task_app.sh +22 -0
  15. synth_ai/environments/examples/bandit/__init__.py +33 -0
  16. synth_ai/environments/examples/bandit/engine.py +294 -0
  17. synth_ai/environments/examples/bandit/environment.py +194 -0
  18. synth_ai/environments/examples/bandit/taskset.py +200 -0
  19. synth_ai/environments/examples/crafter_classic/agent_demos/analyze_semantic_words_markdown.py +250 -0
  20. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_comprehensive_evaluation.py +59 -0
  21. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_browser.py +152 -0
  22. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_config.toml +24 -0
  23. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_framework.py +1194 -0
  24. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/crafter_synth_config.toml +56 -0
  25. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_config_modal.toml +32 -0
  26. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_traces_sft_turso.py +724 -0
  27. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/kick_off_ft_modal.py +384 -0
  28. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_action_results.py +53 -0
  29. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_agent_actions.py +178 -0
  30. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_latest_run.py +222 -0
  31. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_lm_traces.py +183 -0
  32. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_no_rewards.py +210 -0
  33. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_trace_issue.py +206 -0
  34. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/check_db_schema.py +49 -0
  35. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/check_latest_results.py +64 -0
  36. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/debug_agent_responses.py +88 -0
  37. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/quick_trace_check.py +77 -0
  38. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/compare_experiments.py +324 -0
  39. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/filter_traces_sft_turso.py +580 -0
  40. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/kick_off_ft_oai.py +362 -0
  41. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/multi_model_config.toml +49 -0
  42. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_enhanced_hooks.py +332 -0
  43. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_hook_events.py +97 -0
  44. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_hook_results.py +217 -0
  45. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/check_hook_storage.py +87 -0
  46. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/check_seeds.py +88 -0
  47. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/compare_seed_performance.py +195 -0
  48. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/custom_eval_pipelines.py +400 -0
  49. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/plot_hook_frequency.py +195 -0
  50. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/seed_analysis_summary.py +56 -0
  51. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/run_rollouts_for_models_and_compare_v3.py +858 -0
  52. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_quick_evaluation.py +52 -0
  53. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_react_agent.py +874 -0
  54. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_trace_evaluation.py +1412 -0
  55. synth_ai/environments/examples/crafter_classic/agent_demos/example_v3_usage.py +216 -0
  56. synth_ai/environments/examples/crafter_classic/agent_demos/old/compare_traces.py +296 -0
  57. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_comprehensive_evaluation.py +58 -0
  58. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_env_serialization.py +464 -0
  59. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_evaluation_browser.py +152 -0
  60. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_quick_evaluation.py +51 -0
  61. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_trace_evaluation.py +1412 -0
  62. synth_ai/environments/examples/crafter_classic/agent_demos/old/debug_player_loss.py +112 -0
  63. synth_ai/environments/examples/crafter_classic/agent_demos/old/diagnose_service.py +203 -0
  64. synth_ai/environments/examples/crafter_classic/agent_demos/old/diagnose_slowness.py +305 -0
  65. synth_ai/environments/examples/crafter_classic/agent_demos/old/eval_by_difficulty.py +126 -0
  66. synth_ai/environments/examples/crafter_classic/agent_demos/old/eval_example.py +94 -0
  67. synth_ai/environments/examples/crafter_classic/agent_demos/old/explore_saved_states.py +142 -0
  68. synth_ai/environments/examples/crafter_classic/agent_demos/old/filter_traces_sft.py +26 -0
  69. synth_ai/environments/examples/crafter_classic/agent_demos/old/filter_traces_sft_OLD.py +984 -0
  70. synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_data_gemini.py +724 -0
  71. synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_data_modal.py +386 -0
  72. synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_metadata.py +205 -0
  73. synth_ai/environments/examples/crafter_classic/agent_demos/old/kick_off_ft_gemini.py +150 -0
  74. synth_ai/environments/examples/crafter_classic/agent_demos/old/kick_off_ft_modal.py +283 -0
  75. synth_ai/environments/examples/crafter_classic/agent_demos/old/prepare_vertex_ft.py +280 -0
  76. synth_ai/environments/examples/crafter_classic/agent_demos/old/profile_env_slowness.py +456 -0
  77. synth_ai/environments/examples/crafter_classic/agent_demos/old/replicate_issue.py +166 -0
  78. synth_ai/environments/examples/crafter_classic/agent_demos/old/run_and_eval.py +102 -0
  79. synth_ai/environments/examples/crafter_classic/agent_demos/old/run_comparison.py +128 -0
  80. synth_ai/environments/examples/crafter_classic/agent_demos/old/run_qwen_rollouts.py +655 -0
  81. synth_ai/environments/examples/crafter_classic/agent_demos/old/trace_eval_OLD.py +202 -0
  82. synth_ai/environments/examples/crafter_classic/agent_demos/old/validate_openai_format.py +166 -0
  83. synth_ai/environments/examples/crafter_classic/environment.py +41 -2
  84. synth_ai/environments/examples/crafter_custom/agent_demos/__init__.py +1 -0
  85. synth_ai/environments/examples/crafter_custom/agent_demos/trace_eval.py +202 -0
  86. synth_ai/environments/examples/crafter_custom/old/analyze_diamond_issue.py +159 -0
  87. synth_ai/environments/examples/crafter_custom/old/analyze_diamond_spawning.py +158 -0
  88. synth_ai/environments/examples/crafter_custom/old/compare_worlds.py +71 -0
  89. synth_ai/environments/examples/crafter_custom/old/dataset_stats.py +105 -0
  90. synth_ai/environments/examples/crafter_custom/old/diamond_spawning_summary.py +119 -0
  91. synth_ai/environments/examples/crafter_custom/old/example_dataset_usage.py +52 -0
  92. synth_ai/environments/examples/enron/units/keyword_stats.py +112 -0
  93. synth_ai/environments/examples/minigrid/agent_demos/minigrid_evaluation_framework.py +1188 -0
  94. synth_ai/environments/examples/minigrid/agent_demos/minigrid_quick_evaluation.py +48 -0
  95. synth_ai/environments/examples/minigrid/agent_demos/minigrid_react_agent.py +562 -0
  96. synth_ai/environments/examples/minigrid/agent_demos/minigrid_trace_evaluation.py +221 -0
  97. synth_ai/environments/examples/nethack/agent_demos/nethack_evaluation_framework.py +981 -0
  98. synth_ai/environments/examples/nethack/agent_demos/nethack_quick_evaluation.py +74 -0
  99. synth_ai/environments/examples/nethack/agent_demos/nethack_react_agent.py +831 -0
  100. synth_ai/environments/examples/red/agent_demos/__init__.py +1 -0
  101. synth_ai/environments/examples/red/units/__init__.py +1 -0
  102. synth_ai/environments/examples/sokoban/agent_demos/sokoban_full_eval.py +899 -0
  103. synth_ai/environments/examples/sokoban/units/astar_common.py +95 -0
  104. synth_ai/environments/service/app.py +8 -0
  105. synth_ai/install_sqld.sh +40 -0
  106. synth_ai-0.2.5.dist-info/METADATA +106 -0
  107. {synth_ai-0.2.4.dev8.dist-info → synth_ai-0.2.5.dist-info}/RECORD +111 -12
  108. {synth_ai-0.2.4.dev8.dist-info → synth_ai-0.2.5.dist-info}/entry_points.txt +1 -0
  109. synth_ai-0.2.4.dev8.dist-info/METADATA +0 -635
  110. {synth_ai-0.2.4.dev8.dist-info → synth_ai-0.2.5.dist-info}/WHEEL +0 -0
  111. {synth_ai-0.2.4.dev8.dist-info → synth_ai-0.2.5.dist-info}/licenses/LICENSE +0 -0
  112. {synth_ai-0.2.4.dev8.dist-info → synth_ai-0.2.5.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,685 @@
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ import json
5
+ import os
6
+ import sys
7
+ import time
8
+ from typing import Any, Dict, Callable
9
+
10
+ from synth_ai.demos.demo_task_apps import core as demo_core
11
+ from synth_ai.demos.demo_task_apps.core import DemoEnv
12
+
13
+
14
+ def cmd_check(_args: argparse.Namespace) -> int:
15
+ env = demo_core.load_env()
16
+ cwd_env_path = os.path.join(os.getcwd(), ".env")
17
+ local_env = demo_core.load_dotenv_file(cwd_env_path)
18
+
19
+ def _refresh_env() -> None:
20
+ nonlocal env, local_env
21
+ env = demo_core.load_env()
22
+ local_env = demo_core.load_dotenv_file(cwd_env_path)
23
+
24
+ def _maybe_fix_task_url() -> None:
25
+ if not env.task_app_name:
26
+ return
27
+ current = env.task_app_base_url
28
+ needs_lookup = False
29
+ if not current:
30
+ needs_lookup = True
31
+ elif not current.endswith(".run") or current.endswith(".moda") or current.count(".") < 2:
32
+ needs_lookup = True
33
+ elif not current.startswith("http://") and not current.startswith("https://"):
34
+ needs_lookup = True
35
+ if not needs_lookup:
36
+ return
37
+ code, out = _popen_capture([
38
+ "uv",
39
+ "run",
40
+ "python",
41
+ "-m",
42
+ "modal",
43
+ "app",
44
+ "url",
45
+ env.task_app_name,
46
+ ])
47
+ if code != 0 or not out:
48
+ return
49
+ new_url = ""
50
+ for token in out.split():
51
+ if token.startswith("http://") or token.startswith("https://"):
52
+ new_url = token.strip().rstrip("/")
53
+ break
54
+ if new_url and new_url != current:
55
+ print(f"Updating TASK_APP_BASE_URL from Modal CLI → {new_url}")
56
+ demo_core.persist_task_url(new_url, name=env.task_app_name)
57
+ dotenv_values = {
58
+ "TASK_APP_BASE_URL": new_url,
59
+ "TASK_APP_NAME": env.task_app_name,
60
+ "TASK_APP_SECRET_NAME": env.task_app_secret_name or f"{env.task_app_name}-secret",
61
+ }
62
+ demo_core.persist_dotenv_values(dotenv_values)
63
+ os.environ["TASK_APP_BASE_URL"] = new_url
64
+ _refresh_env()
65
+
66
+ synth_key = env.synth_api_key.strip()
67
+ if not synth_key:
68
+ print("SYNTH_API_KEY missing from environment/.env.")
69
+ entered = input("Enter SYNTH_API_KEY (required): ").strip()
70
+ if not entered:
71
+ print("SYNTH_API_KEY is required.")
72
+ return 1
73
+ os.environ["SYNTH_API_KEY"] = entered
74
+ demo_core.persist_api_key(entered)
75
+ path = demo_core.persist_dotenv_values({"SYNTH_API_KEY": entered})
76
+ print(f"Stored SYNTH_API_KEY in {path}")
77
+ _refresh_env()
78
+ synth_key = entered
79
+ elif not local_env.get("SYNTH_API_KEY"):
80
+ path = demo_core.persist_dotenv_values({"SYNTH_API_KEY": synth_key})
81
+ print(f"Stored SYNTH_API_KEY in {path}")
82
+ _refresh_env()
83
+
84
+ modal_ok, modal_msg = demo_core.modal_auth_status()
85
+ print(f"Modal auth: {'OK' if modal_ok else 'MISSING'} ({modal_msg})")
86
+
87
+ _maybe_fix_task_url()
88
+
89
+ ok_backend = False
90
+ ok_task = False
91
+ if env.dev_backend_url:
92
+ api = env.dev_backend_url.rstrip("/") + ("" if env.dev_backend_url.endswith("/api") else "/api")
93
+ ok_backend = demo_core.assert_http_ok(api + "/health", method="GET")
94
+ print(f"Backend health: {'OK' if ok_backend else 'FAIL'} ({api}/health)")
95
+ else:
96
+ print("Backend URL missing; set DEV_BACKEND_URL.")
97
+ if env.task_app_base_url:
98
+ ok_task = demo_core.assert_http_ok(env.task_app_base_url.rstrip("/") + "/health", method="GET") or \
99
+ demo_core.assert_http_ok(env.task_app_base_url.rstrip("/"), method="GET")
100
+ print(f"Task app: {'OK' if ok_task else 'UNREACHABLE'} ({env.task_app_base_url})")
101
+ else:
102
+ print("Task app URL not set; run: uvx synth-ai rl_demo deploy")
103
+
104
+ print("uv: ", end="")
105
+ try:
106
+ import subprocess
107
+
108
+ subprocess.check_call(["uv", "--version"])
109
+ except Exception:
110
+ print("(uv not found; install with `pip install uv`)\n", flush=True)
111
+
112
+ status = 0
113
+ if not ok_backend:
114
+ status = 1
115
+ if not modal_ok:
116
+ status = 1
117
+ if not env.synth_api_key:
118
+ status = 1
119
+ return status
120
+
121
+
122
+ def _popen_capture(cmd: list[str], cwd: str | None = None, env: dict | None = None) -> tuple[int, str]:
123
+ import subprocess
124
+ try:
125
+ proc = subprocess.Popen(cmd, cwd=cwd, env=env, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
126
+ out, _ = proc.communicate()
127
+ return int(proc.returncode or 0), out or ""
128
+ except Exception as e:
129
+ return 1, str(e)
130
+
131
+
132
+ def _popen_stream(cmd: list[str], cwd: str | None = None, env: dict | None = None) -> int:
133
+ """Stream subprocess output line-by-line to stdout for real-time feedback."""
134
+
135
+ import subprocess
136
+ import threading
137
+
138
+ try:
139
+ proc = subprocess.Popen(
140
+ cmd,
141
+ cwd=cwd,
142
+ env=env,
143
+ stdout=subprocess.PIPE,
144
+ stderr=subprocess.STDOUT,
145
+ text=True,
146
+ bufsize=1,
147
+ )
148
+ except Exception as exc:
149
+ print(f"Failed to launch {' '.join(cmd)}: {exc}")
150
+ return 1
151
+
152
+ def _pump(stdout) -> None:
153
+ try:
154
+ for line in stdout:
155
+ print(line.rstrip())
156
+ except Exception:
157
+ pass
158
+
159
+ if proc.stdout is not None:
160
+ t = threading.Thread(target=_pump, args=(proc.stdout,), daemon=True)
161
+ t.start()
162
+ proc.wait()
163
+ t.join(timeout=1.0)
164
+ else:
165
+ proc.wait()
166
+ return int(proc.returncode or 0)
167
+
168
+
169
+ def cmd_deploy(args: argparse.Namespace) -> int:
170
+ env = demo_core.load_env()
171
+ url = ""
172
+ app_name = env.task_app_name or ""
173
+ try:
174
+ if args.local:
175
+ print("Starting local Task App…")
176
+ import subprocess
177
+ subprocess.Popen([sys.executable, "-c", "from synth_ai.demos.demo_task_apps.math.app import run; run()"],
178
+ stdout=sys.stdout, stderr=sys.stderr)
179
+ target = "http://127.0.0.1:8080"
180
+ app_name = ""
181
+ for _ in range(30):
182
+ if demo_core.assert_http_ok(target + "/health", method="GET") or demo_core.assert_http_ok(target, method="GET"):
183
+ url = target
184
+ break
185
+ time.sleep(1)
186
+ else:
187
+ # Auto-detect app path if not supplied; prompt for name and confirmation.
188
+ app_path = os.path.abspath(args.app) if args.app else None
189
+ if not app_path or not os.path.isfile(app_path):
190
+ candidates = [
191
+ os.path.abspath(os.path.join(os.getcwd(), "math_task_app.py")),
192
+ "/Users/joshpurtell/Documents/GitHub/monorepo/tests/applications/math/rl/math_task_app.py",
193
+ ]
194
+ app_path = next((p for p in candidates if os.path.isfile(p)), None)
195
+ if not app_path and args.script:
196
+ # Legacy script fallback if user supplied --script explicitly
197
+ from synth_ai.demos.demo_task_apps.math.deploy_modal import deploy as modal_deploy
198
+ url = modal_deploy(script_path=args.script, env_api_key=env.env_api_key)
199
+ if args.name:
200
+ app_name = args.name
201
+ else:
202
+ if not app_path:
203
+ entered = input("Path to Modal app.py (e.g., tests/applications/math/rl/math_task_app.py): ").strip()
204
+ if not entered:
205
+ raise FileNotFoundError("No app.py path provided and auto-detect failed")
206
+ app_path = os.path.abspath(entered)
207
+ if not os.path.isfile(app_path):
208
+ raise FileNotFoundError(f"App file not found: {app_path}")
209
+ suggested_name = args.name or f"synth-{os.path.splitext(os.path.basename(app_path))[0]}"
210
+ name_in = input(f"Modal app name [{suggested_name}]: ").strip() or suggested_name
211
+ app_name = name_in
212
+ print("\nAbout to deploy with:")
213
+ print(f" app.py: {app_path}")
214
+ print(f" name: {name_in}")
215
+ proceed = (input("Proceed? [Y/n]: ").strip().lower() or "y").startswith("y")
216
+ if not proceed:
217
+ print("Aborted by user.")
218
+ return 1
219
+ deploy_cmd = ["uv", "run", "python", "-m", "modal", "deploy", "--name", name_in, app_path]
220
+ code, out = _popen_capture(deploy_cmd)
221
+ print(out)
222
+ if code != 0:
223
+ raise RuntimeError(f"modal deploy failed (exit {code})")
224
+ url_cmd = ["uv", "run", "python", "-m", "modal", "app", "url", name_in]
225
+ code2, out2 = _popen_capture(url_cmd)
226
+ if code2 == 0:
227
+ for token in out2.split():
228
+ if token.startswith("http://") or token.startswith("https://"):
229
+ url = token.strip().rstrip("/")
230
+ break
231
+ if not url:
232
+ for token in (out + "\n" + out2).split():
233
+ if token.startswith("http://") or token.startswith("https://"):
234
+ url = token.strip().rstrip("/")
235
+ break
236
+ if not url:
237
+ raise RuntimeError("Failed to resolve public URL from modal CLI output")
238
+ if not url:
239
+ print("Failed to determine Task App URL")
240
+ return 2
241
+ demo_core.persist_task_url(url, name=app_name or None)
242
+ dotenv_values = {"TASK_APP_BASE_URL": url}
243
+ if app_name:
244
+ dotenv_values["TASK_APP_NAME"] = app_name
245
+ dotenv_values["TASK_APP_SECRET_NAME"] = f"{app_name}-secret"
246
+ dotenv_path = demo_core.persist_dotenv_values(dotenv_values)
247
+ print(f"TASK_APP_BASE_URL={url}")
248
+ if app_name:
249
+ print(f"TASK_APP_NAME={app_name}")
250
+ print("Export for this shell:")
251
+ print(f" export TASK_APP_BASE_URL={url}")
252
+ if app_name:
253
+ print(f" export TASK_APP_NAME={app_name}")
254
+ print(f" export TASK_APP_SECRET_NAME={app_name}-secret")
255
+ print(f"Persisted to {dotenv_path}")
256
+ print("Next: uvx synth-ai rl_demo configure")
257
+ return 0
258
+ except Exception as e:
259
+ print(f"Deploy error: {e}")
260
+ return 2
261
+
262
+
263
+ def cmd_configure(args: argparse.Namespace) -> int:
264
+ from synth_ai.rl.secrets import mint_environment_api_key
265
+
266
+ env = demo_core.load_env()
267
+ cwd_env_path = os.path.join(os.getcwd(), ".env")
268
+ local_env = demo_core.load_dotenv_file(cwd_env_path)
269
+
270
+ synth_key = env.synth_api_key.strip()
271
+ if not synth_key:
272
+ synth_key = input("Enter SYNTH_API_KEY (required): ").strip()
273
+ if not synth_key:
274
+ print("SYNTH_API_KEY is required.")
275
+ return 1
276
+ demo_core.persist_api_key(synth_key)
277
+ demo_core.persist_dotenv_values({"SYNTH_API_KEY": synth_key})
278
+
279
+ env_key = env.env_api_key.strip()
280
+ minted_env_key = False
281
+ if not env_key:
282
+ env_key = mint_environment_api_key()
283
+ minted_env_key = True
284
+ print("Minted new ENVIRONMENT_API_KEY")
285
+ demo_core.persist_env_api_key(env_key)
286
+ demo_core.persist_dotenv_values({"ENVIRONMENT_API_KEY": env_key})
287
+
288
+ task_url = env.task_app_base_url
289
+ if not task_url:
290
+ print("Task app URL missing. Run: uvx synth-ai rl_demo deploy")
291
+ return 1
292
+
293
+ app_name = env.task_app_name.strip()
294
+ if not app_name:
295
+ fallback = input("Enter Modal app name for the task app (required): ").strip()
296
+ if not fallback:
297
+ print("Task app name is required to configure Modal secrets.")
298
+ return 1
299
+ app_name = fallback
300
+ demo_core.persist_task_url(task_url, name=app_name)
301
+
302
+ secret_name = env.task_app_secret_name.strip() or f"{app_name}-secret"
303
+ demo_core.persist_task_url(task_url, name=app_name)
304
+ demo_core.persist_dotenv_values({
305
+ "TASK_APP_BASE_URL": task_url,
306
+ "TASK_APP_NAME": app_name,
307
+ "TASK_APP_SECRET_NAME": secret_name,
308
+ })
309
+
310
+ # Ensure Modal secret has the environment API key (and optional extras).
311
+ secret_args = [f"ENVIRONMENT_API_KEY={env_key}"]
312
+ openai_key = (os.environ.get("OPENAI_API_KEY") or local_env.get("OPENAI_API_KEY") or "").strip()
313
+ if openai_key:
314
+ secret_args.append(f"OPENAI_API_KEY={openai_key}")
315
+ synth_for_secret = synth_key
316
+ if synth_for_secret:
317
+ secret_args.append(f"SYNTH_API_KEY={synth_for_secret}")
318
+
319
+ create_cmd = ["uv", "run", "modal", "secret", "create", secret_name, *secret_args]
320
+ code, out = _popen_capture(create_cmd)
321
+ if code != 0:
322
+ print(out)
323
+ print("Secret create failed; retrying with delete → create…")
324
+ _popen_capture(["uv", "run", "modal", "secret", "delete", secret_name])
325
+ code, out = _popen_capture(create_cmd)
326
+ if code != 0:
327
+ print(out)
328
+ print("Failed to provision Modal secret.")
329
+ return 2
330
+
331
+ # Verify task app can read the secret by hitting rollout health with X-API-Key.
332
+ rollout_url = task_url.rstrip("/") + "/health/rollout"
333
+ rc, body = _http("GET", rollout_url, headers={"X-API-Key": env_key})
334
+ if rc != 200:
335
+ print(f"Warning: rollout health check failed ({rc}). Response: {body}")
336
+ else:
337
+ print("Task app rollout health check OK.")
338
+
339
+ env.synth_api_key = synth_key
340
+ env.env_api_key = env_key
341
+ env.task_app_name = app_name
342
+ env.task_app_secret_name = secret_name
343
+
344
+ # Prepare a baseline TOML (formerly `prepare`): prompt and write demo_config.toml
345
+ defaults = [
346
+ os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "demo_task_apps", "math", "config.toml")),
347
+ ]
348
+ mono = "/Users/joshpurtell/Documents/GitHub/monorepo/tests/applications/math/rl/math_online.toml"
349
+ if os.path.isfile(mono):
350
+ defaults.append(mono)
351
+ print("Select a baseline TOML:")
352
+ for i, p in enumerate(defaults, 1):
353
+ print(f" [{i}] {p}")
354
+ choice = input(f"Enter choice [1-{len(defaults)}] (default 1): ").strip() or "1"
355
+ try:
356
+ idx = max(1, min(int(choice), len(defaults))) - 1
357
+ except Exception:
358
+ idx = 0
359
+ base_path = defaults[idx]
360
+ with open(base_path, "r") as fh:
361
+ text = fh.read()
362
+ import re
363
+ # Extract current defaults from the selected TOML
364
+ def _extract(pattern: str, default: str) -> str:
365
+ m = re.search(pattern, text, flags=re.M)
366
+ if not m:
367
+ return default
368
+ val = (m.group(1) or "").strip()
369
+ return val if val else default
370
+ current_gpu_type = _extract(r"^gpu_type\s*=\s*\"([^\"]+)\"$", "A100")
371
+ # topology form gpu_type = "TYPE:COUNT" also supported for deriving defaults
372
+ topo_gpu = _extract(r"^gpu_type\s*=\s*\"([^\":]+):(\d+)\"$", current_gpu_type)
373
+ if ":" in topo_gpu:
374
+ current_gpu_type = topo_gpu.split(":", 1)[0]
375
+ current_gpu_count = _extract(r"^gpu_count\s*=\s*(\d+)$", "4")
376
+ if ":" in topo_gpu:
377
+ current_gpu_count = topo_gpu.split(":", 1)[1]
378
+ current_model = _extract(r"^name\s*=\s*\"([^\"]+)\"$", "Qwen/Qwen3-0.6B")
379
+ current_tp = _extract(r"^tensor_parallel_size\s*=\s*(\d+)$", "2")
380
+
381
+ # Prompts with defaults shown; Enter keeps current
382
+ def _prompt(label: str, default_val: str) -> str:
383
+ entered = input(f"{label} [{default_val}]: ").strip()
384
+ return entered or default_val
385
+
386
+ gpu_type = _prompt("GPU type", current_gpu_type)
387
+ gpu_count = _prompt("GPU count", current_gpu_count)
388
+ model = _prompt("Model", current_model)
389
+ tp = _prompt("Tensor parallel", current_tp)
390
+
391
+ text = re.sub(r"(?m)^gpu_type\s*=\s*\".*?\"$", f"gpu_type = \"{gpu_type}\"", text)
392
+ text = re.sub(r"(?m)^gpu_count\s*=\s*\d+$", f"gpu_count = {int(gpu_count)}", text)
393
+ text = re.sub(r"(?m)^name\s*=\s*\".*?\"$", f"name = \"{model}\"", text)
394
+ text = re.sub(r"(?m)^tensor_parallel_size\s*=\s*\d+$", f"tensor_parallel_size = {int(tp)}", text)
395
+ text = re.sub(r"(?m)^gpu_type\s*=\s*\".*?:\d+\"$", f"gpu_type = \"{gpu_type}:{int(gpu_count)}\"", text)
396
+ out_path = os.path.abspath(os.path.join(os.getcwd(), "demo_config.toml"))
397
+ _write_text(out_path, text)
398
+ print(f"Prepared config at: {out_path}")
399
+ here_cfg = os.path.abspath(out_path)
400
+ print("Config path:", here_cfg)
401
+ print("Environment (masked):")
402
+ print(json.dumps({
403
+ "DEV_BACKEND_URL": env.dev_backend_url,
404
+ "SYNTH_API_KEY": (synth_key[:6] + "…") if synth_key else "",
405
+ "ENVIRONMENT_API_KEY": (env_key[:6] + "…") if env_key else "",
406
+ "TASK_APP_BASE_URL": task_url,
407
+ "TASK_APP_NAME": app_name,
408
+ "TASK_APP_SECRET_NAME": secret_name,
409
+ }, indent=2))
410
+ if minted_env_key:
411
+ print(f"Stored minted ENVIRONMENT_API_KEY in {cwd_env_path}")
412
+ print("Next: uvx synth-ai rl_demo run")
413
+ return 0
414
+
415
+
416
+ def _http(method: str, url: str, headers: Dict[str, str] | None = None, body: Dict[str, Any] | None = None) -> tuple[int, Dict[str, Any] | str]:
417
+ import urllib.request, urllib.error, json as _json
418
+ data = None
419
+ if body is not None:
420
+ data = _json.dumps(body).encode("utf-8")
421
+ req = urllib.request.Request(url, method=method, headers=headers or {}, data=data)
422
+ try:
423
+ with urllib.request.urlopen(req, timeout=60) as resp:
424
+ code = getattr(resp, "status", 200)
425
+ txt = resp.read().decode("utf-8", errors="ignore")
426
+ try:
427
+ return int(code), _json.loads(txt)
428
+ except Exception:
429
+ return int(code), txt
430
+ except urllib.error.HTTPError as he: # Capture 4xx/5xx bodies
431
+ txt = he.read().decode("utf-8", errors="ignore")
432
+ try:
433
+ return int(he.code or 0), _json.loads(txt)
434
+ except Exception:
435
+ return int(he.code or 0), txt
436
+ except Exception as e:
437
+ return 0, str(e)
438
+
439
+
440
+ def _write_text(path: str, content: str) -> None:
441
+ os.makedirs(os.path.dirname(path), exist_ok=True)
442
+ with open(path, "w") as fh:
443
+ fh.write(content)
444
+
445
+
446
+ # Note: `prepare` command has been removed; configuration now prepares TOML
447
+
448
+
449
+ def cmd_run(args: argparse.Namespace) -> int:
450
+ env = demo_core.load_env()
451
+ # Prompt for missing SYNTH_API_KEY
452
+ if not env.synth_api_key:
453
+ entered = input("Enter SYNTH_API_KEY (required): ").strip()
454
+ if not entered:
455
+ print("SYNTH_API_KEY is required.")
456
+ return 1
457
+ os.environ["SYNTH_API_KEY"] = entered
458
+ demo_core.persist_api_key(entered)
459
+ demo_core.persist_dotenv_values({"SYNTH_API_KEY": entered})
460
+ # Re-resolve env after potential persist
461
+ env = demo_core.load_env()
462
+ if not env.task_app_base_url:
463
+ print("Task app URL missing. Run: uvx synth-ai rl_demo deploy")
464
+ return 1
465
+ if not env.dev_backend_url:
466
+ print("Backend URL missing. Set DEV_BACKEND_URL in a .env or rely on default prod.")
467
+ return 1
468
+ if not env.env_api_key:
469
+ print("ENVIRONMENT_API_KEY missing. Run: uvx synth-ai rl_demo configure")
470
+ return 1
471
+ os.environ["ENVIRONMENT_API_KEY"] = env.env_api_key
472
+
473
+ # Detect monorepo launcher and delegate if available (aligns with run_clustered.sh which works)
474
+ launcher = "/Users/joshpurtell/Documents/GitHub/monorepo/tests/applications/math/rl/start_math_clustered.py"
475
+ if os.path.isfile(launcher):
476
+ backend_base = env.dev_backend_url[:-4] if env.dev_backend_url.endswith("/api") else env.dev_backend_url
477
+ run_env = os.environ.copy()
478
+ run_env["BACKEND_URL"] = backend_base
479
+ run_env["SYNTH_API_KEY"] = env.synth_api_key
480
+ run_env["TASK_APP_BASE_URL"] = env.task_app_base_url
481
+ run_env["ENVIRONMENT_API_KEY"] = env.env_api_key
482
+ # Optional: TRAINER_START_URL passthrough if already set in environment
483
+ run_env["TRAINER_START_URL"] = run_env.get("TRAINER_START_URL", "")
484
+ # Forward convenience knobs
485
+ if args.batch_size is not None:
486
+ run_env["RL_BATCH_SIZE"] = str(int(args.batch_size))
487
+ if args.group_size is not None:
488
+ run_env["RL_GROUP_SIZE"] = str(int(args.group_size))
489
+ if args.model:
490
+ run_env["RL_MODEL"] = args.model
491
+ cmd = ["uv", "run", "python", launcher]
492
+ print(f"Launching monorepo clustered runner: {' '.join(cmd)}")
493
+ code = _popen_stream(cmd, env=run_env)
494
+ if code != 0:
495
+ print(f"Clustered runner exited with code {code}")
496
+ return code
497
+
498
+ # Fallback: legacy jobs API flow
499
+ import tomllib
500
+ # Determine config path: --config overrides; otherwise prompt from detected candidates
501
+ cfg_path = None
502
+ if getattr(args, "config", None):
503
+ cfg_path = os.path.abspath(args.config)
504
+ if not os.path.isfile(cfg_path):
505
+ print(f"Config not found: {cfg_path}")
506
+ return 1
507
+ else:
508
+ candidates: list[str] = []
509
+ # Prepared in CWD and home
510
+ cwd_prepared = os.path.abspath(os.path.join(os.getcwd(), "demo_config.toml"))
511
+ home_prepared = os.path.expanduser("~/.synth-ai/demo_config.toml")
512
+ if os.path.isfile(cwd_prepared):
513
+ candidates.append(cwd_prepared)
514
+ if os.path.isfile(home_prepared):
515
+ candidates.append(home_prepared)
516
+ # Monorepo math_online.toml if present
517
+ mono = "/Users/joshpurtell/Documents/GitHub/monorepo/tests/applications/math/rl/math_online.toml"
518
+ if os.path.isfile(mono):
519
+ candidates.append(mono)
520
+ # Packaged default
521
+ packaged = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "demo_task_apps", "math", "config.toml"))
522
+ candidates.append(packaged)
523
+ # Deduplicate while preserving order
524
+ seen = set()
525
+ uniq: list[str] = []
526
+ for p in candidates:
527
+ if p not in seen:
528
+ seen.add(p)
529
+ uniq.append(p)
530
+ print("Choose a TOML config:")
531
+ for i, p in enumerate(uniq, 1):
532
+ print(f" [{i}] {p}")
533
+ sel = input(f"Enter choice [1-{len(uniq)}] (default 1): ").strip() or "1"
534
+ try:
535
+ idx = max(1, min(int(sel), len(uniq))) - 1
536
+ except Exception:
537
+ idx = 0
538
+ cfg_path = uniq[idx]
539
+ with open(cfg_path, "rb") as fh:
540
+ inline_cfg = tomllib.load(fh)
541
+ with open(cfg_path, "r") as fh2:
542
+ toml_text = fh2.read()
543
+ if args.batch_size is not None:
544
+ inline_cfg.setdefault("training", {})["batch_size"] = int(args.batch_size)
545
+ if args.group_size is not None:
546
+ inline_cfg.setdefault("training", {})["group_size"] = int(args.group_size)
547
+ model_name = args.model or (inline_cfg.get("model", {}) or {}).get("name", "Qwen/Qwen3-0.6B")
548
+ api = env.dev_backend_url.rstrip("/") + ("" if env.dev_backend_url.endswith("/api") else "/api")
549
+ data_fragment: Dict[str, Any] = {
550
+ "model": model_name,
551
+ "endpoint_base_url": env.task_app_base_url,
552
+ "config": inline_cfg,
553
+ "config_toml": toml_text,
554
+ "config_source": "toml_inline",
555
+ "metadata": {"source": "synth-ai rl_demo", "cwd": os.getcwd()},
556
+ }
557
+ if env.env_api_key:
558
+ data_fragment["environment_api_key"] = env.env_api_key
559
+ for k in ("training", "evaluation", "rollout", "topology", "vllm"):
560
+ if isinstance(inline_cfg.get(k), dict):
561
+ data_fragment[k] = inline_cfg[k]
562
+ compute = {}
563
+ if isinstance(inline_cfg.get("compute"), dict):
564
+ if inline_cfg["compute"].get("gpu_type"):
565
+ compute["gpu_type"] = str(inline_cfg["compute"]["gpu_type"]).upper()
566
+ if inline_cfg["compute"].get("gpu_count"):
567
+ compute["gpu_count"] = int(inline_cfg["compute"]["gpu_count"])
568
+ if not compute:
569
+ topo = inline_cfg.get("topology") or {}
570
+ gshape = str(topo.get("gpu_type") or "")
571
+ if ":" in gshape:
572
+ t, c = gshape.split(":", 1)
573
+ compute = {"gpu_type": t.upper(), "gpu_count": int(c)}
574
+ body: Dict[str, Any] = {
575
+ "job_type": "rl",
576
+ "data": data_fragment,
577
+ }
578
+ if compute:
579
+ body["compute"] = compute
580
+ code, js = _http("POST", api + "/rl/jobs", headers={
581
+ "Content-Type": "application/json",
582
+ "Authorization": f"Bearer {env.synth_api_key}",
583
+ }, body=body)
584
+ if code not in (200, 201) or not isinstance(js, dict):
585
+ print("Job create failed:", code)
586
+ try:
587
+ if isinstance(js, dict):
588
+ print(json.dumps(js, indent=2))
589
+ else:
590
+ print(str(js))
591
+ except Exception:
592
+ print(str(js))
593
+ print("Request body was:\n" + json.dumps(body, indent=2))
594
+ return 2
595
+ job_id = js.get("job_id") or js.get("id") or ""
596
+ if not job_id:
597
+ print("Job id missing in response:", js)
598
+ print("Request body was:\n" + json.dumps(body, indent=2))
599
+ return 2
600
+ print("JOB_ID:", job_id)
601
+ _http("POST", api + f"/rl/jobs/{job_id}/start", headers={"Authorization": f"Bearer {env.synth_api_key}"})
602
+ since = 0
603
+ terminal = {"succeeded", "failed", "cancelled", "error", "completed"}
604
+ last_status = ""
605
+ start_t = time.time()
606
+ while True:
607
+ sc, sj = _http("GET", api + f"/learning/jobs/{job_id}")
608
+ status = (sj.get("status") if isinstance(sj, dict) else "") if sc == 200 else ""
609
+ if status and status != last_status:
610
+ last_status = status
611
+ print("status →", status)
612
+ if status and status.lower() in terminal:
613
+ print("FINAL:", status)
614
+ break
615
+ ec, ej = _http("GET", api + f"/orchestration/jobs/{job_id}/events?since_seq={since}&limit=200")
616
+ if ec == 200 and isinstance(ej, dict):
617
+ events = ej.get("events") or ej.get("data") or []
618
+ for e in events:
619
+ seq = int(e.get("seq") or 0)
620
+ if seq <= since:
621
+ continue
622
+ since = seq
623
+ typ = str(e.get("type") or e.get("event_type") or "").lower()
624
+ msg = e.get("message") or e.get("msg") or ""
625
+ if typ in ("rl.eval.started", "rl.eval.summary", "rl.train.step", "rl.metrics", "rl.performance.metrics"):
626
+ print(f"[{seq}] {typ}: {msg}")
627
+ mc, mj = _http("GET", api + f"/learning/jobs/{job_id}/metrics?after_step=-1&limit=50")
628
+ if mc == 200 and isinstance(mj, dict):
629
+ pts = mj.get("points") or []
630
+ for p in pts:
631
+ name = p.get("name")
632
+ if name == "eval.reward_mean":
633
+ print(f"metric eval.reward_mean step={p.get('step')} value={p.get('value')}")
634
+ break
635
+ if time.time() - start_t > (args.timeout or 600):
636
+ print("Timeout waiting for terminal state.")
637
+ break
638
+ time.sleep(2)
639
+ return 0
640
+
641
+
642
+ def main(argv: list[str] | None = None) -> int:
643
+ p = argparse.ArgumentParser(prog="synth-ai")
644
+ sub = p.add_subparsers(dest="cmd")
645
+
646
+ def _add_parser(names: list[str], *, configure: Callable[[argparse.ArgumentParser], None]) -> None:
647
+ for name in names:
648
+ parser = sub.add_parser(name)
649
+ configure(parser)
650
+
651
+ _add_parser(["rl_demo.check", "demo.check"], configure=lambda parser: parser.set_defaults(func=cmd_check))
652
+
653
+ # (prepare command removed)
654
+
655
+ def _deploy_opts(parser):
656
+ parser.add_argument("--local", action="store_true", help="Run local FastAPI instead of Modal deploy")
657
+ parser.add_argument("--app", type=str, default=None, help="Path to Modal app.py for uv run modal deploy")
658
+ parser.add_argument("--name", type=str, default="synth-math-demo", help="Modal app name")
659
+ parser.add_argument("--script", type=str, default=None, help="Path to deploy_task_app.sh (optional legacy)")
660
+ parser.set_defaults(func=cmd_deploy)
661
+
662
+ _add_parser(["rl_demo.deploy", "demo.deploy"], configure=_deploy_opts)
663
+
664
+ _add_parser(["rl_demo.configure", "demo.configure"], configure=lambda parser: parser.set_defaults(func=cmd_configure))
665
+
666
+ def _run_opts(parser):
667
+ parser.add_argument("--config", type=str, default=None, help="Path to TOML config (skip prompt)")
668
+ parser.add_argument("--batch-size", type=int, default=None)
669
+ parser.add_argument("--group-size", type=int, default=None)
670
+ parser.add_argument("--model", type=str, default=None)
671
+ parser.add_argument("--timeout", type=int, default=600)
672
+ parser.add_argument("--dry-run", action="store_true", help="Print request body and exit")
673
+ parser.set_defaults(func=cmd_run)
674
+
675
+ _add_parser(["rl_demo.run", "demo.run"], configure=_run_opts)
676
+
677
+ args = p.parse_args(argv)
678
+ if not hasattr(args, "func"):
679
+ p.print_help()
680
+ return 1
681
+ return int(args.func(args) or 0)
682
+
683
+
684
+ if __name__ == "__main__":
685
+ sys.exit(main())
@@ -0,0 +1 @@
1
+ # Namespace for demo task apps (math, etc.)