synth-ai 0.2.4.dev8__py3-none-any.whl → 0.2.4.dev9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (111) hide show
  1. synth_ai/cli/__init__.py +6 -0
  2. synth_ai/cli/demo.py +68 -9
  3. synth_ai/cli/rl_demo.py +137 -0
  4. synth_ai/cli/root.py +65 -0
  5. synth_ai/demos/core/__init__.py +1 -0
  6. synth_ai/demos/core/cli.py +621 -0
  7. synth_ai/demos/demo_task_apps/__init__.py +1 -0
  8. synth_ai/demos/demo_task_apps/core.py +374 -0
  9. synth_ai/demos/demo_task_apps/math/__init__.py +1 -0
  10. synth_ai/demos/demo_task_apps/math/app.py +37 -0
  11. synth_ai/demos/demo_task_apps/math/config.toml +44 -0
  12. synth_ai/demos/demo_task_apps/math/deploy_modal.py +60 -0
  13. synth_ai/demos/demo_task_apps/math/deploy_task_app.sh +22 -0
  14. synth_ai/environments/examples/bandit/__init__.py +33 -0
  15. synth_ai/environments/examples/bandit/engine.py +294 -0
  16. synth_ai/environments/examples/bandit/environment.py +194 -0
  17. synth_ai/environments/examples/bandit/taskset.py +200 -0
  18. synth_ai/environments/examples/crafter_classic/agent_demos/analyze_semantic_words_markdown.py +250 -0
  19. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_comprehensive_evaluation.py +59 -0
  20. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_browser.py +152 -0
  21. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_config.toml +24 -0
  22. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_framework.py +1194 -0
  23. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/crafter_synth_config.toml +56 -0
  24. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_config_modal.toml +32 -0
  25. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_traces_sft_turso.py +724 -0
  26. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/kick_off_ft_modal.py +384 -0
  27. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_action_results.py +53 -0
  28. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_agent_actions.py +178 -0
  29. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_latest_run.py +222 -0
  30. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_lm_traces.py +183 -0
  31. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_no_rewards.py +210 -0
  32. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_trace_issue.py +206 -0
  33. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/check_db_schema.py +49 -0
  34. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/check_latest_results.py +64 -0
  35. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/debug_agent_responses.py +88 -0
  36. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/quick_trace_check.py +77 -0
  37. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/compare_experiments.py +324 -0
  38. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/filter_traces_sft_turso.py +580 -0
  39. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/kick_off_ft_oai.py +362 -0
  40. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/multi_model_config.toml +49 -0
  41. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_enhanced_hooks.py +332 -0
  42. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_hook_events.py +97 -0
  43. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_hook_results.py +217 -0
  44. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/check_hook_storage.py +87 -0
  45. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/check_seeds.py +88 -0
  46. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/compare_seed_performance.py +195 -0
  47. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/custom_eval_pipelines.py +400 -0
  48. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/plot_hook_frequency.py +195 -0
  49. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/seed_analysis_summary.py +56 -0
  50. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/run_rollouts_for_models_and_compare_v3.py +858 -0
  51. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_quick_evaluation.py +52 -0
  52. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_react_agent.py +874 -0
  53. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_trace_evaluation.py +1412 -0
  54. synth_ai/environments/examples/crafter_classic/agent_demos/example_v3_usage.py +216 -0
  55. synth_ai/environments/examples/crafter_classic/agent_demos/old/compare_traces.py +296 -0
  56. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_comprehensive_evaluation.py +58 -0
  57. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_env_serialization.py +464 -0
  58. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_evaluation_browser.py +152 -0
  59. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_quick_evaluation.py +51 -0
  60. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_trace_evaluation.py +1412 -0
  61. synth_ai/environments/examples/crafter_classic/agent_demos/old/debug_player_loss.py +112 -0
  62. synth_ai/environments/examples/crafter_classic/agent_demos/old/diagnose_service.py +203 -0
  63. synth_ai/environments/examples/crafter_classic/agent_demos/old/diagnose_slowness.py +305 -0
  64. synth_ai/environments/examples/crafter_classic/agent_demos/old/eval_by_difficulty.py +126 -0
  65. synth_ai/environments/examples/crafter_classic/agent_demos/old/eval_example.py +94 -0
  66. synth_ai/environments/examples/crafter_classic/agent_demos/old/explore_saved_states.py +142 -0
  67. synth_ai/environments/examples/crafter_classic/agent_demos/old/filter_traces_sft.py +26 -0
  68. synth_ai/environments/examples/crafter_classic/agent_demos/old/filter_traces_sft_OLD.py +984 -0
  69. synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_data_gemini.py +724 -0
  70. synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_data_modal.py +386 -0
  71. synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_metadata.py +205 -0
  72. synth_ai/environments/examples/crafter_classic/agent_demos/old/kick_off_ft_gemini.py +150 -0
  73. synth_ai/environments/examples/crafter_classic/agent_demos/old/kick_off_ft_modal.py +283 -0
  74. synth_ai/environments/examples/crafter_classic/agent_demos/old/prepare_vertex_ft.py +280 -0
  75. synth_ai/environments/examples/crafter_classic/agent_demos/old/profile_env_slowness.py +456 -0
  76. synth_ai/environments/examples/crafter_classic/agent_demos/old/replicate_issue.py +166 -0
  77. synth_ai/environments/examples/crafter_classic/agent_demos/old/run_and_eval.py +102 -0
  78. synth_ai/environments/examples/crafter_classic/agent_demos/old/run_comparison.py +128 -0
  79. synth_ai/environments/examples/crafter_classic/agent_demos/old/run_qwen_rollouts.py +655 -0
  80. synth_ai/environments/examples/crafter_classic/agent_demos/old/trace_eval_OLD.py +202 -0
  81. synth_ai/environments/examples/crafter_classic/agent_demos/old/validate_openai_format.py +166 -0
  82. synth_ai/environments/examples/crafter_classic/environment.py +41 -2
  83. synth_ai/environments/examples/crafter_custom/agent_demos/__init__.py +1 -0
  84. synth_ai/environments/examples/crafter_custom/agent_demos/trace_eval.py +202 -0
  85. synth_ai/environments/examples/crafter_custom/old/analyze_diamond_issue.py +159 -0
  86. synth_ai/environments/examples/crafter_custom/old/analyze_diamond_spawning.py +158 -0
  87. synth_ai/environments/examples/crafter_custom/old/compare_worlds.py +71 -0
  88. synth_ai/environments/examples/crafter_custom/old/dataset_stats.py +105 -0
  89. synth_ai/environments/examples/crafter_custom/old/diamond_spawning_summary.py +119 -0
  90. synth_ai/environments/examples/crafter_custom/old/example_dataset_usage.py +52 -0
  91. synth_ai/environments/examples/enron/units/keyword_stats.py +112 -0
  92. synth_ai/environments/examples/minigrid/agent_demos/minigrid_evaluation_framework.py +1188 -0
  93. synth_ai/environments/examples/minigrid/agent_demos/minigrid_quick_evaluation.py +48 -0
  94. synth_ai/environments/examples/minigrid/agent_demos/minigrid_react_agent.py +562 -0
  95. synth_ai/environments/examples/minigrid/agent_demos/minigrid_trace_evaluation.py +221 -0
  96. synth_ai/environments/examples/nethack/agent_demos/nethack_evaluation_framework.py +981 -0
  97. synth_ai/environments/examples/nethack/agent_demos/nethack_quick_evaluation.py +74 -0
  98. synth_ai/environments/examples/nethack/agent_demos/nethack_react_agent.py +831 -0
  99. synth_ai/environments/examples/red/agent_demos/__init__.py +1 -0
  100. synth_ai/environments/examples/red/units/__init__.py +1 -0
  101. synth_ai/environments/examples/sokoban/agent_demos/sokoban_full_eval.py +899 -0
  102. synth_ai/environments/examples/sokoban/units/astar_common.py +95 -0
  103. synth_ai/environments/service/app.py +8 -0
  104. synth_ai/install_sqld.sh +40 -0
  105. synth_ai-0.2.4.dev9.dist-info/METADATA +91 -0
  106. {synth_ai-0.2.4.dev8.dist-info → synth_ai-0.2.4.dev9.dist-info}/RECORD +110 -11
  107. {synth_ai-0.2.4.dev8.dist-info → synth_ai-0.2.4.dev9.dist-info}/entry_points.txt +1 -0
  108. synth_ai-0.2.4.dev8.dist-info/METADATA +0 -635
  109. {synth_ai-0.2.4.dev8.dist-info → synth_ai-0.2.4.dev9.dist-info}/WHEEL +0 -0
  110. {synth_ai-0.2.4.dev8.dist-info → synth_ai-0.2.4.dev9.dist-info}/licenses/LICENSE +0 -0
  111. {synth_ai-0.2.4.dev8.dist-info → synth_ai-0.2.4.dev9.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,621 @@
1
+ from __future__ import annotations
2
+
3
+ import argparse
4
+ import json
5
+ import os
6
+ import sys
7
+ import time
8
+ from typing import Any, Dict, Callable
9
+
10
+ from synth_ai.demos.demo_task_apps import core as demo_core
11
+ from synth_ai.demos.demo_task_apps.core import DemoEnv
12
+
13
+
14
+ def cmd_check(_args: argparse.Namespace) -> int:
15
+ env = demo_core.load_env()
16
+ cwd_env_path = os.path.join(os.getcwd(), ".env")
17
+ local_env = demo_core.load_dotenv_file(cwd_env_path)
18
+
19
+ def _refresh_env() -> None:
20
+ nonlocal env, local_env
21
+ env = demo_core.load_env()
22
+ local_env = demo_core.load_dotenv_file(cwd_env_path)
23
+
24
+ def _maybe_fix_task_url() -> None:
25
+ if not env.task_app_name:
26
+ return
27
+ current = env.task_app_base_url
28
+ needs_lookup = False
29
+ if not current:
30
+ needs_lookup = True
31
+ elif not current.endswith(".run") or current.endswith(".moda") or current.count(".") < 2:
32
+ needs_lookup = True
33
+ elif not current.startswith("http://") and not current.startswith("https://"):
34
+ needs_lookup = True
35
+ if not needs_lookup:
36
+ return
37
+ code, out = _popen_capture([
38
+ "uv",
39
+ "run",
40
+ "python",
41
+ "-m",
42
+ "modal",
43
+ "app",
44
+ "url",
45
+ env.task_app_name,
46
+ ])
47
+ if code != 0 or not out:
48
+ return
49
+ new_url = ""
50
+ for token in out.split():
51
+ if token.startswith("http://") or token.startswith("https://"):
52
+ new_url = token.strip().rstrip("/")
53
+ break
54
+ if new_url and new_url != current:
55
+ print(f"Updating TASK_APP_BASE_URL from Modal CLI → {new_url}")
56
+ demo_core.persist_task_url(new_url, name=env.task_app_name)
57
+ dotenv_values = {
58
+ "TASK_APP_BASE_URL": new_url,
59
+ "TASK_APP_NAME": env.task_app_name,
60
+ "TASK_APP_SECRET_NAME": env.task_app_secret_name or f"{env.task_app_name}-secret",
61
+ }
62
+ demo_core.persist_dotenv_values(dotenv_values)
63
+ os.environ["TASK_APP_BASE_URL"] = new_url
64
+ _refresh_env()
65
+
66
+ synth_key = env.synth_api_key.strip()
67
+ if not synth_key:
68
+ print("SYNTH_API_KEY missing from environment/.env.")
69
+ entered = input("Enter SYNTH_API_KEY (required): ").strip()
70
+ if not entered:
71
+ print("SYNTH_API_KEY is required.")
72
+ return 1
73
+ os.environ["SYNTH_API_KEY"] = entered
74
+ demo_core.persist_api_key(entered)
75
+ path = demo_core.persist_dotenv_values({"SYNTH_API_KEY": entered})
76
+ print(f"Stored SYNTH_API_KEY in {path}")
77
+ _refresh_env()
78
+ synth_key = entered
79
+ elif not local_env.get("SYNTH_API_KEY"):
80
+ path = demo_core.persist_dotenv_values({"SYNTH_API_KEY": synth_key})
81
+ print(f"Stored SYNTH_API_KEY in {path}")
82
+ _refresh_env()
83
+
84
+ modal_ok, modal_msg = demo_core.modal_auth_status()
85
+ print(f"Modal auth: {'OK' if modal_ok else 'MISSING'} ({modal_msg})")
86
+
87
+ _maybe_fix_task_url()
88
+
89
+ ok_backend = False
90
+ ok_task = False
91
+ if env.dev_backend_url:
92
+ api = env.dev_backend_url.rstrip("/") + ("" if env.dev_backend_url.endswith("/api") else "/api")
93
+ ok_backend = demo_core.assert_http_ok(api + "/health", method="GET")
94
+ print(f"Backend health: {'OK' if ok_backend else 'FAIL'} ({api}/health)")
95
+ else:
96
+ print("Backend URL missing; set DEV_BACKEND_URL.")
97
+ if env.task_app_base_url:
98
+ ok_task = demo_core.assert_http_ok(env.task_app_base_url.rstrip("/") + "/health", method="GET") or \
99
+ demo_core.assert_http_ok(env.task_app_base_url.rstrip("/"), method="GET")
100
+ print(f"Task app: {'OK' if ok_task else 'UNREACHABLE'} ({env.task_app_base_url})")
101
+ else:
102
+ print("Task app URL not set; run: uvx synth-ai rl_demo deploy")
103
+
104
+ print("uv: ", end="")
105
+ try:
106
+ import subprocess
107
+
108
+ subprocess.check_call(["uv", "--version"])
109
+ except Exception:
110
+ print("(uv not found; install with `pip install uv`)\n", flush=True)
111
+
112
+ status = 0
113
+ if not ok_backend:
114
+ status = 1
115
+ if not modal_ok:
116
+ status = 1
117
+ if not env.synth_api_key:
118
+ status = 1
119
+ return status
120
+
121
+
122
+ def _popen_capture(cmd: list[str], cwd: str | None = None, env: dict | None = None) -> tuple[int, str]:
123
+ import subprocess
124
+ try:
125
+ proc = subprocess.Popen(cmd, cwd=cwd, env=env, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, text=True)
126
+ out, _ = proc.communicate()
127
+ return int(proc.returncode or 0), out or ""
128
+ except Exception as e:
129
+ return 1, str(e)
130
+
131
+
132
+ def cmd_deploy(args: argparse.Namespace) -> int:
133
+ env = demo_core.load_env()
134
+ url = ""
135
+ app_name = env.task_app_name or ""
136
+ try:
137
+ if args.local:
138
+ print("Starting local Task App…")
139
+ import subprocess
140
+ subprocess.Popen([sys.executable, "-c", "from synth_ai.demos.demo_task_apps.math.app import run; run()"],
141
+ stdout=sys.stdout, stderr=sys.stderr)
142
+ target = "http://127.0.0.1:8080"
143
+ app_name = ""
144
+ for _ in range(30):
145
+ if demo_core.assert_http_ok(target + "/health", method="GET") or demo_core.assert_http_ok(target, method="GET"):
146
+ url = target
147
+ break
148
+ time.sleep(1)
149
+ else:
150
+ # Auto-detect app path if not supplied; prompt for name and confirmation.
151
+ app_path = os.path.abspath(args.app) if args.app else None
152
+ if not app_path or not os.path.isfile(app_path):
153
+ candidates = [
154
+ os.path.abspath(os.path.join(os.getcwd(), "math_task_app.py")),
155
+ "/Users/joshpurtell/Documents/GitHub/monorepo/tests/applications/math/rl/math_task_app.py",
156
+ ]
157
+ app_path = next((p for p in candidates if os.path.isfile(p)), None)
158
+ if not app_path and args.script:
159
+ # Legacy script fallback if user supplied --script explicitly
160
+ from synth_ai.demos.demo_task_apps.math.deploy_modal import deploy as modal_deploy
161
+ url = modal_deploy(script_path=args.script, env_api_key=env.env_api_key)
162
+ if args.name:
163
+ app_name = args.name
164
+ else:
165
+ if not app_path:
166
+ entered = input("Path to Modal app.py (e.g., tests/applications/math/rl/math_task_app.py): ").strip()
167
+ if not entered:
168
+ raise FileNotFoundError("No app.py path provided and auto-detect failed")
169
+ app_path = os.path.abspath(entered)
170
+ if not os.path.isfile(app_path):
171
+ raise FileNotFoundError(f"App file not found: {app_path}")
172
+ suggested_name = args.name or f"synth-{os.path.splitext(os.path.basename(app_path))[0]}"
173
+ name_in = input(f"Modal app name [{suggested_name}]: ").strip() or suggested_name
174
+ app_name = name_in
175
+ print("\nAbout to deploy with:")
176
+ print(f" app.py: {app_path}")
177
+ print(f" name: {name_in}")
178
+ proceed = (input("Proceed? [Y/n]: ").strip().lower() or "y").startswith("y")
179
+ if not proceed:
180
+ print("Aborted by user.")
181
+ return 1
182
+ deploy_cmd = ["uv", "run", "python", "-m", "modal", "deploy", "--name", name_in, app_path]
183
+ code, out = _popen_capture(deploy_cmd)
184
+ print(out)
185
+ if code != 0:
186
+ raise RuntimeError(f"modal deploy failed (exit {code})")
187
+ url_cmd = ["uv", "run", "python", "-m", "modal", "app", "url", name_in]
188
+ code2, out2 = _popen_capture(url_cmd)
189
+ if code2 == 0:
190
+ for token in out2.split():
191
+ if token.startswith("http://") or token.startswith("https://"):
192
+ url = token.strip().rstrip("/")
193
+ break
194
+ if not url:
195
+ for token in (out + "\n" + out2).split():
196
+ if token.startswith("http://") or token.startswith("https://"):
197
+ url = token.strip().rstrip("/")
198
+ break
199
+ if not url:
200
+ raise RuntimeError("Failed to resolve public URL from modal CLI output")
201
+ if not url:
202
+ print("Failed to determine Task App URL")
203
+ return 2
204
+ demo_core.persist_task_url(url, name=app_name or None)
205
+ dotenv_values = {"TASK_APP_BASE_URL": url}
206
+ if app_name:
207
+ dotenv_values["TASK_APP_NAME"] = app_name
208
+ dotenv_values["TASK_APP_SECRET_NAME"] = f"{app_name}-secret"
209
+ dotenv_path = demo_core.persist_dotenv_values(dotenv_values)
210
+ print(f"TASK_APP_BASE_URL={url}")
211
+ if app_name:
212
+ print(f"TASK_APP_NAME={app_name}")
213
+ print("Export for this shell:")
214
+ print(f" export TASK_APP_BASE_URL={url}")
215
+ if app_name:
216
+ print(f" export TASK_APP_NAME={app_name}")
217
+ print(f" export TASK_APP_SECRET_NAME={app_name}-secret")
218
+ print(f"Persisted to {dotenv_path}")
219
+ print("Next: uvx synth-ai rl_demo configure")
220
+ return 0
221
+ except Exception as e:
222
+ print(f"Deploy error: {e}")
223
+ return 2
224
+
225
+
226
+ def cmd_configure(args: argparse.Namespace) -> int:
227
+ from synth_ai.rl.secrets import mint_environment_api_key
228
+
229
+ env = demo_core.load_env()
230
+ cwd_env_path = os.path.join(os.getcwd(), ".env")
231
+ local_env = demo_core.load_dotenv_file(cwd_env_path)
232
+
233
+ synth_key = env.synth_api_key.strip()
234
+ if not synth_key:
235
+ synth_key = input("Enter SYNTH_API_KEY (required): ").strip()
236
+ if not synth_key:
237
+ print("SYNTH_API_KEY is required.")
238
+ return 1
239
+ demo_core.persist_api_key(synth_key)
240
+ demo_core.persist_dotenv_values({"SYNTH_API_KEY": synth_key})
241
+
242
+ env_key = env.env_api_key.strip()
243
+ minted_env_key = False
244
+ if not env_key:
245
+ env_key = mint_environment_api_key()
246
+ minted_env_key = True
247
+ print("Minted new ENVIRONMENT_API_KEY")
248
+ demo_core.persist_env_api_key(env_key)
249
+ demo_core.persist_dotenv_values({"ENVIRONMENT_API_KEY": env_key})
250
+
251
+ task_url = env.task_app_base_url
252
+ if not task_url:
253
+ print("Task app URL missing. Run: uvx synth-ai rl_demo deploy")
254
+ return 1
255
+
256
+ app_name = env.task_app_name.strip()
257
+ if not app_name:
258
+ fallback = input("Enter Modal app name for the task app (required): ").strip()
259
+ if not fallback:
260
+ print("Task app name is required to configure Modal secrets.")
261
+ return 1
262
+ app_name = fallback
263
+ demo_core.persist_task_url(task_url, name=app_name)
264
+
265
+ secret_name = env.task_app_secret_name.strip() or f"{app_name}-secret"
266
+ demo_core.persist_task_url(task_url, name=app_name)
267
+ demo_core.persist_dotenv_values({
268
+ "TASK_APP_BASE_URL": task_url,
269
+ "TASK_APP_NAME": app_name,
270
+ "TASK_APP_SECRET_NAME": secret_name,
271
+ })
272
+
273
+ # Ensure Modal secret has the environment API key (and optional extras).
274
+ secret_args = [f"ENVIRONMENT_API_KEY={env_key}"]
275
+ openai_key = (os.environ.get("OPENAI_API_KEY") or local_env.get("OPENAI_API_KEY") or "").strip()
276
+ if openai_key:
277
+ secret_args.append(f"OPENAI_API_KEY={openai_key}")
278
+ synth_for_secret = synth_key
279
+ if synth_for_secret:
280
+ secret_args.append(f"SYNTH_API_KEY={synth_for_secret}")
281
+
282
+ create_cmd = ["uv", "run", "modal", "secret", "create", secret_name, *secret_args]
283
+ code, out = _popen_capture(create_cmd)
284
+ if code != 0:
285
+ print(out)
286
+ print("Secret create failed; retrying with delete → create…")
287
+ _popen_capture(["uv", "run", "modal", "secret", "delete", secret_name])
288
+ code, out = _popen_capture(create_cmd)
289
+ if code != 0:
290
+ print(out)
291
+ print("Failed to provision Modal secret.")
292
+ return 2
293
+
294
+ # Verify task app can read the secret by hitting rollout health with X-API-Key.
295
+ rollout_url = task_url.rstrip("/") + "/health/rollout"
296
+ rc, body = _http("GET", rollout_url, headers={"X-API-Key": env_key})
297
+ if rc != 200:
298
+ print(f"Warning: rollout health check failed ({rc}). Response: {body}")
299
+ else:
300
+ print("Task app rollout health check OK.")
301
+
302
+ env.synth_api_key = synth_key
303
+ env.env_api_key = env_key
304
+ env.task_app_name = app_name
305
+ env.task_app_secret_name = secret_name
306
+
307
+ # Prepare a baseline TOML (formerly `prepare`): prompt and write demo_config.toml
308
+ defaults = [
309
+ os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "demo_task_apps", "math", "config.toml")),
310
+ ]
311
+ mono = "/Users/joshpurtell/Documents/GitHub/monorepo/tests/applications/math/rl/math_online.toml"
312
+ if os.path.isfile(mono):
313
+ defaults.append(mono)
314
+ print("Select a baseline TOML:")
315
+ for i, p in enumerate(defaults, 1):
316
+ print(f" [{i}] {p}")
317
+ choice = input(f"Enter choice [1-{len(defaults)}] (default 1): ").strip() or "1"
318
+ try:
319
+ idx = max(1, min(int(choice), len(defaults))) - 1
320
+ except Exception:
321
+ idx = 0
322
+ base_path = defaults[idx]
323
+ with open(base_path, "r") as fh:
324
+ text = fh.read()
325
+ gpu_type = input("GPU type (e.g., A100): ").strip() or "A100"
326
+ gpu_count = input("GPU count (e.g., 4): ").strip() or "4"
327
+ model = input("Model (e.g., Qwen/Qwen3-0.6B): ").strip() or "Qwen/Qwen3-0.6B"
328
+ tp = input("Tensor parallel (e.g., 2): ").strip() or "2"
329
+ import re
330
+ text = re.sub(r"(?m)^gpu_type\s*=\s*\".*?\"$", f"gpu_type = \"{gpu_type}\"", text)
331
+ text = re.sub(r"(?m)^gpu_count\s*=\s*\d+$", f"gpu_count = {int(gpu_count)}", text)
332
+ text = re.sub(r"(?m)^name\s*=\s*\".*?\"$", f"name = \"{model}\"", text)
333
+ text = re.sub(r"(?m)^tensor_parallel_size\s*=\s*\d+$", f"tensor_parallel_size = {int(tp)}", text)
334
+ text = re.sub(r"(?m)^gpu_type\s*=\s*\".*?:\d+\"$", f"gpu_type = \"{gpu_type}:{int(gpu_count)}\"", text)
335
+ out_path = os.path.abspath(os.path.join(os.getcwd(), "demo_config.toml"))
336
+ _write_text(out_path, text)
337
+ print(f"Prepared config at: {out_path}")
338
+ here_cfg = os.path.abspath(out_path)
339
+ print("Config path:", here_cfg)
340
+ print("Environment (masked):")
341
+ print(json.dumps({
342
+ "DEV_BACKEND_URL": env.dev_backend_url,
343
+ "SYNTH_API_KEY": (synth_key[:6] + "…") if synth_key else "",
344
+ "ENVIRONMENT_API_KEY": (env_key[:6] + "…") if env_key else "",
345
+ "TASK_APP_BASE_URL": task_url,
346
+ "TASK_APP_NAME": app_name,
347
+ "TASK_APP_SECRET_NAME": secret_name,
348
+ }, indent=2))
349
+ if minted_env_key:
350
+ print(f"Stored minted ENVIRONMENT_API_KEY in {cwd_env_path}")
351
+ print("Next: uvx synth-ai rl_demo run")
352
+ return 0
353
+
354
+
355
+ def _http(method: str, url: str, headers: Dict[str, str] | None = None, body: Dict[str, Any] | None = None) -> tuple[int, Dict[str, Any] | str]:
356
+ import urllib.request, urllib.error, json as _json
357
+ data = None
358
+ if body is not None:
359
+ data = _json.dumps(body).encode("utf-8")
360
+ req = urllib.request.Request(url, method=method, headers=headers or {}, data=data)
361
+ try:
362
+ with urllib.request.urlopen(req, timeout=60) as resp:
363
+ code = getattr(resp, "status", 200)
364
+ txt = resp.read().decode("utf-8", errors="ignore")
365
+ try:
366
+ return int(code), _json.loads(txt)
367
+ except Exception:
368
+ return int(code), txt
369
+ except urllib.error.HTTPError as he: # Capture 4xx/5xx bodies
370
+ txt = he.read().decode("utf-8", errors="ignore")
371
+ try:
372
+ return int(he.code or 0), _json.loads(txt)
373
+ except Exception:
374
+ return int(he.code or 0), txt
375
+ except Exception as e:
376
+ return 0, str(e)
377
+
378
+
379
+ def _write_text(path: str, content: str) -> None:
380
+ os.makedirs(os.path.dirname(path), exist_ok=True)
381
+ with open(path, "w") as fh:
382
+ fh.write(content)
383
+
384
+
385
+ # Note: `prepare` command has been removed; configuration now prepares TOML
386
+
387
+
388
+ def cmd_run(args: argparse.Namespace) -> int:
389
+ env = demo_core.load_env()
390
+ # Prompt for missing SYNTH_API_KEY
391
+ if not env.synth_api_key:
392
+ entered = input("Enter SYNTH_API_KEY (required): ").strip()
393
+ if not entered:
394
+ print("SYNTH_API_KEY is required.")
395
+ return 1
396
+ os.environ["SYNTH_API_KEY"] = entered
397
+ demo_core.persist_api_key(entered)
398
+ demo_core.persist_dotenv_values({"SYNTH_API_KEY": entered})
399
+ # Re-resolve env after potential persist
400
+ env = demo_core.load_env()
401
+ if not env.task_app_base_url:
402
+ print("Task app URL missing. Run: uvx synth-ai rl_demo deploy")
403
+ return 1
404
+ if not env.dev_backend_url:
405
+ print("Backend URL missing. Set DEV_BACKEND_URL in a .env or rely on default prod.")
406
+ return 1
407
+ if not env.env_api_key:
408
+ print("ENVIRONMENT_API_KEY missing. Run: uvx synth-ai rl_demo configure")
409
+ return 1
410
+ os.environ["ENVIRONMENT_API_KEY"] = env.env_api_key
411
+
412
+ # Detect monorepo launcher and delegate if available (aligns with run_clustered.sh which works)
413
+ launcher = "/Users/joshpurtell/Documents/GitHub/monorepo/tests/applications/math/rl/start_math_clustered.py"
414
+ if os.path.isfile(launcher):
415
+ backend_base = env.dev_backend_url[:-4] if env.dev_backend_url.endswith("/api") else env.dev_backend_url
416
+ run_env = os.environ.copy()
417
+ run_env["BACKEND_URL"] = backend_base
418
+ run_env["SYNTH_API_KEY"] = env.synth_api_key
419
+ run_env["TASK_APP_BASE_URL"] = env.task_app_base_url
420
+ run_env["ENVIRONMENT_API_KEY"] = env.env_api_key
421
+ # Optional: TRAINER_START_URL passthrough if already set in environment
422
+ run_env["TRAINER_START_URL"] = run_env.get("TRAINER_START_URL", "")
423
+ # Forward convenience knobs
424
+ if args.batch_size is not None:
425
+ run_env["RL_BATCH_SIZE"] = str(int(args.batch_size))
426
+ if args.group_size is not None:
427
+ run_env["RL_GROUP_SIZE"] = str(int(args.group_size))
428
+ if args.model:
429
+ run_env["RL_MODEL"] = args.model
430
+ code, out = _popen_capture(["uv", "run", "python", launcher], env=run_env)
431
+ print(out)
432
+ return int(code or 0)
433
+
434
+ # Fallback: legacy jobs API flow
435
+ import tomllib
436
+ # Determine config path: --config overrides; otherwise prompt from detected candidates
437
+ cfg_path = None
438
+ if getattr(args, "config", None):
439
+ cfg_path = os.path.abspath(args.config)
440
+ if not os.path.isfile(cfg_path):
441
+ print(f"Config not found: {cfg_path}")
442
+ return 1
443
+ else:
444
+ candidates: list[str] = []
445
+ # Prepared in CWD and home
446
+ cwd_prepared = os.path.abspath(os.path.join(os.getcwd(), "demo_config.toml"))
447
+ home_prepared = os.path.expanduser("~/.synth-ai/demo_config.toml")
448
+ if os.path.isfile(cwd_prepared):
449
+ candidates.append(cwd_prepared)
450
+ if os.path.isfile(home_prepared):
451
+ candidates.append(home_prepared)
452
+ # Monorepo math_online.toml if present
453
+ mono = "/Users/joshpurtell/Documents/GitHub/monorepo/tests/applications/math/rl/math_online.toml"
454
+ if os.path.isfile(mono):
455
+ candidates.append(mono)
456
+ # Packaged default
457
+ packaged = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", "demo_task_apps", "math", "config.toml"))
458
+ candidates.append(packaged)
459
+ # Deduplicate while preserving order
460
+ seen = set()
461
+ uniq: list[str] = []
462
+ for p in candidates:
463
+ if p not in seen:
464
+ seen.add(p)
465
+ uniq.append(p)
466
+ print("Choose a TOML config:")
467
+ for i, p in enumerate(uniq, 1):
468
+ print(f" [{i}] {p}")
469
+ sel = input(f"Enter choice [1-{len(uniq)}] (default 1): ").strip() or "1"
470
+ try:
471
+ idx = max(1, min(int(sel), len(uniq))) - 1
472
+ except Exception:
473
+ idx = 0
474
+ cfg_path = uniq[idx]
475
+ with open(cfg_path, "rb") as fh:
476
+ inline_cfg = tomllib.load(fh)
477
+ with open(cfg_path, "r") as fh2:
478
+ toml_text = fh2.read()
479
+ if args.batch_size is not None:
480
+ inline_cfg.setdefault("training", {})["batch_size"] = int(args.batch_size)
481
+ if args.group_size is not None:
482
+ inline_cfg.setdefault("training", {})["group_size"] = int(args.group_size)
483
+ model_name = args.model or (inline_cfg.get("model", {}) or {}).get("name", "Qwen/Qwen3-0.6B")
484
+ api = env.dev_backend_url.rstrip("/") + ("" if env.dev_backend_url.endswith("/api") else "/api")
485
+ data_fragment: Dict[str, Any] = {
486
+ "model": model_name,
487
+ "endpoint_base_url": env.task_app_base_url,
488
+ "config": inline_cfg,
489
+ "config_toml": toml_text,
490
+ "config_source": "toml_inline",
491
+ "metadata": {"source": "synth-ai rl_demo", "cwd": os.getcwd()},
492
+ }
493
+ if env.env_api_key:
494
+ data_fragment["environment_api_key"] = env.env_api_key
495
+ for k in ("training", "evaluation", "rollout", "topology", "vllm"):
496
+ if isinstance(inline_cfg.get(k), dict):
497
+ data_fragment[k] = inline_cfg[k]
498
+ compute = {}
499
+ if isinstance(inline_cfg.get("compute"), dict):
500
+ if inline_cfg["compute"].get("gpu_type"):
501
+ compute["gpu_type"] = str(inline_cfg["compute"]["gpu_type"]).upper()
502
+ if inline_cfg["compute"].get("gpu_count"):
503
+ compute["gpu_count"] = int(inline_cfg["compute"]["gpu_count"])
504
+ if not compute:
505
+ topo = inline_cfg.get("topology") or {}
506
+ gshape = str(topo.get("gpu_type") or "")
507
+ if ":" in gshape:
508
+ t, c = gshape.split(":", 1)
509
+ compute = {"gpu_type": t.upper(), "gpu_count": int(c)}
510
+ body: Dict[str, Any] = {
511
+ "job_type": "rl",
512
+ "data": data_fragment,
513
+ }
514
+ if compute:
515
+ body["compute"] = compute
516
+ code, js = _http("POST", api + "/rl/jobs", headers={
517
+ "Content-Type": "application/json",
518
+ "Authorization": f"Bearer {env.synth_api_key}",
519
+ }, body=body)
520
+ if code not in (200, 201) or not isinstance(js, dict):
521
+ print("Job create failed:", code)
522
+ try:
523
+ if isinstance(js, dict):
524
+ print(json.dumps(js, indent=2))
525
+ else:
526
+ print(str(js))
527
+ except Exception:
528
+ print(str(js))
529
+ print("Request body was:\n" + json.dumps(body, indent=2))
530
+ return 2
531
+ job_id = js.get("job_id") or js.get("id") or ""
532
+ if not job_id:
533
+ print("Job id missing in response:", js)
534
+ print("Request body was:\n" + json.dumps(body, indent=2))
535
+ return 2
536
+ print("JOB_ID:", job_id)
537
+ _http("POST", api + f"/rl/jobs/{job_id}/start", headers={"Authorization": f"Bearer {env.synth_api_key}"})
538
+ since = 0
539
+ terminal = {"succeeded", "failed", "cancelled", "error", "completed"}
540
+ last_status = ""
541
+ start_t = time.time()
542
+ while True:
543
+ sc, sj = _http("GET", api + f"/learning/jobs/{job_id}")
544
+ status = (sj.get("status") if isinstance(sj, dict) else "") if sc == 200 else ""
545
+ if status and status != last_status:
546
+ last_status = status
547
+ print("status →", status)
548
+ if status and status.lower() in terminal:
549
+ print("FINAL:", status)
550
+ break
551
+ ec, ej = _http("GET", api + f"/orchestration/jobs/{job_id}/events?since_seq={since}&limit=200")
552
+ if ec == 200 and isinstance(ej, dict):
553
+ events = ej.get("events") or ej.get("data") or []
554
+ for e in events:
555
+ seq = int(e.get("seq") or 0)
556
+ if seq <= since:
557
+ continue
558
+ since = seq
559
+ typ = str(e.get("type") or e.get("event_type") or "").lower()
560
+ msg = e.get("message") or e.get("msg") or ""
561
+ if typ in ("rl.eval.started", "rl.eval.summary", "rl.train.step", "rl.metrics", "rl.performance.metrics"):
562
+ print(f"[{seq}] {typ}: {msg}")
563
+ mc, mj = _http("GET", api + f"/learning/jobs/{job_id}/metrics?after_step=-1&limit=50")
564
+ if mc == 200 and isinstance(mj, dict):
565
+ pts = mj.get("points") or []
566
+ for p in pts:
567
+ name = p.get("name")
568
+ if name == "eval.reward_mean":
569
+ print(f"metric eval.reward_mean step={p.get('step')} value={p.get('value')}")
570
+ break
571
+ if time.time() - start_t > (args.timeout or 600):
572
+ print("Timeout waiting for terminal state.")
573
+ break
574
+ time.sleep(2)
575
+ return 0
576
+
577
+
578
+ def main(argv: list[str] | None = None) -> int:
579
+ p = argparse.ArgumentParser(prog="synth-ai")
580
+ sub = p.add_subparsers(dest="cmd")
581
+
582
+ def _add_parser(names: list[str], *, configure: Callable[[argparse.ArgumentParser], None]) -> None:
583
+ for name in names:
584
+ parser = sub.add_parser(name)
585
+ configure(parser)
586
+
587
+ _add_parser(["rl_demo.check", "demo.check"], configure=lambda parser: parser.set_defaults(func=cmd_check))
588
+
589
+ # (prepare command removed)
590
+
591
+ def _deploy_opts(parser):
592
+ parser.add_argument("--local", action="store_true", help="Run local FastAPI instead of Modal deploy")
593
+ parser.add_argument("--app", type=str, default=None, help="Path to Modal app.py for uv run modal deploy")
594
+ parser.add_argument("--name", type=str, default="synth-math-demo", help="Modal app name")
595
+ parser.add_argument("--script", type=str, default=None, help="Path to deploy_task_app.sh (optional legacy)")
596
+ parser.set_defaults(func=cmd_deploy)
597
+
598
+ _add_parser(["rl_demo.deploy", "demo.deploy"], configure=_deploy_opts)
599
+
600
+ _add_parser(["rl_demo.configure", "demo.configure"], configure=lambda parser: parser.set_defaults(func=cmd_configure))
601
+
602
+ def _run_opts(parser):
603
+ parser.add_argument("--config", type=str, default=None, help="Path to TOML config (skip prompt)")
604
+ parser.add_argument("--batch-size", type=int, default=None)
605
+ parser.add_argument("--group-size", type=int, default=None)
606
+ parser.add_argument("--model", type=str, default=None)
607
+ parser.add_argument("--timeout", type=int, default=600)
608
+ parser.add_argument("--dry-run", action="store_true", help="Print request body and exit")
609
+ parser.set_defaults(func=cmd_run)
610
+
611
+ _add_parser(["rl_demo.run", "demo.run"], configure=_run_opts)
612
+
613
+ args = p.parse_args(argv)
614
+ if not hasattr(args, "func"):
615
+ p.print_help()
616
+ return 1
617
+ return int(args.func(args) or 0)
618
+
619
+
620
+ if __name__ == "__main__":
621
+ sys.exit(main())
@@ -0,0 +1 @@
1
+ # Namespace for demo task apps (math, etc.)