synth-ai 0.2.4.dev8__py3-none-any.whl → 0.2.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (112) hide show
  1. synth_ai/__init__.py +1 -1
  2. synth_ai/cli/__init__.py +6 -0
  3. synth_ai/cli/demo.py +68 -9
  4. synth_ai/cli/rl_demo.py +137 -0
  5. synth_ai/cli/root.py +65 -0
  6. synth_ai/demos/core/__init__.py +1 -0
  7. synth_ai/demos/core/cli.py +685 -0
  8. synth_ai/demos/demo_task_apps/__init__.py +1 -0
  9. synth_ai/demos/demo_task_apps/core.py +374 -0
  10. synth_ai/demos/demo_task_apps/math/__init__.py +1 -0
  11. synth_ai/demos/demo_task_apps/math/app.py +37 -0
  12. synth_ai/demos/demo_task_apps/math/config.toml +44 -0
  13. synth_ai/demos/demo_task_apps/math/deploy_modal.py +60 -0
  14. synth_ai/demos/demo_task_apps/math/deploy_task_app.sh +22 -0
  15. synth_ai/environments/examples/bandit/__init__.py +33 -0
  16. synth_ai/environments/examples/bandit/engine.py +294 -0
  17. synth_ai/environments/examples/bandit/environment.py +194 -0
  18. synth_ai/environments/examples/bandit/taskset.py +200 -0
  19. synth_ai/environments/examples/crafter_classic/agent_demos/analyze_semantic_words_markdown.py +250 -0
  20. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_comprehensive_evaluation.py +59 -0
  21. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_browser.py +152 -0
  22. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_config.toml +24 -0
  23. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_framework.py +1194 -0
  24. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/crafter_synth_config.toml +56 -0
  25. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_config_modal.toml +32 -0
  26. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_traces_sft_turso.py +724 -0
  27. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/kick_off_ft_modal.py +384 -0
  28. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_action_results.py +53 -0
  29. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_agent_actions.py +178 -0
  30. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_latest_run.py +222 -0
  31. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_lm_traces.py +183 -0
  32. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_no_rewards.py +210 -0
  33. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_trace_issue.py +206 -0
  34. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/check_db_schema.py +49 -0
  35. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/check_latest_results.py +64 -0
  36. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/debug_agent_responses.py +88 -0
  37. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/quick_trace_check.py +77 -0
  38. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/compare_experiments.py +324 -0
  39. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/filter_traces_sft_turso.py +580 -0
  40. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/kick_off_ft_oai.py +362 -0
  41. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/multi_model_config.toml +49 -0
  42. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_enhanced_hooks.py +332 -0
  43. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_hook_events.py +97 -0
  44. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_hook_results.py +217 -0
  45. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/check_hook_storage.py +87 -0
  46. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/check_seeds.py +88 -0
  47. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/compare_seed_performance.py +195 -0
  48. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/custom_eval_pipelines.py +400 -0
  49. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/plot_hook_frequency.py +195 -0
  50. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/seed_analysis_summary.py +56 -0
  51. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/run_rollouts_for_models_and_compare_v3.py +858 -0
  52. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_quick_evaluation.py +52 -0
  53. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_react_agent.py +874 -0
  54. synth_ai/environments/examples/crafter_classic/agent_demos/crafter_trace_evaluation.py +1412 -0
  55. synth_ai/environments/examples/crafter_classic/agent_demos/example_v3_usage.py +216 -0
  56. synth_ai/environments/examples/crafter_classic/agent_demos/old/compare_traces.py +296 -0
  57. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_comprehensive_evaluation.py +58 -0
  58. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_env_serialization.py +464 -0
  59. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_evaluation_browser.py +152 -0
  60. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_quick_evaluation.py +51 -0
  61. synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_trace_evaluation.py +1412 -0
  62. synth_ai/environments/examples/crafter_classic/agent_demos/old/debug_player_loss.py +112 -0
  63. synth_ai/environments/examples/crafter_classic/agent_demos/old/diagnose_service.py +203 -0
  64. synth_ai/environments/examples/crafter_classic/agent_demos/old/diagnose_slowness.py +305 -0
  65. synth_ai/environments/examples/crafter_classic/agent_demos/old/eval_by_difficulty.py +126 -0
  66. synth_ai/environments/examples/crafter_classic/agent_demos/old/eval_example.py +94 -0
  67. synth_ai/environments/examples/crafter_classic/agent_demos/old/explore_saved_states.py +142 -0
  68. synth_ai/environments/examples/crafter_classic/agent_demos/old/filter_traces_sft.py +26 -0
  69. synth_ai/environments/examples/crafter_classic/agent_demos/old/filter_traces_sft_OLD.py +984 -0
  70. synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_data_gemini.py +724 -0
  71. synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_data_modal.py +386 -0
  72. synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_metadata.py +205 -0
  73. synth_ai/environments/examples/crafter_classic/agent_demos/old/kick_off_ft_gemini.py +150 -0
  74. synth_ai/environments/examples/crafter_classic/agent_demos/old/kick_off_ft_modal.py +283 -0
  75. synth_ai/environments/examples/crafter_classic/agent_demos/old/prepare_vertex_ft.py +280 -0
  76. synth_ai/environments/examples/crafter_classic/agent_demos/old/profile_env_slowness.py +456 -0
  77. synth_ai/environments/examples/crafter_classic/agent_demos/old/replicate_issue.py +166 -0
  78. synth_ai/environments/examples/crafter_classic/agent_demos/old/run_and_eval.py +102 -0
  79. synth_ai/environments/examples/crafter_classic/agent_demos/old/run_comparison.py +128 -0
  80. synth_ai/environments/examples/crafter_classic/agent_demos/old/run_qwen_rollouts.py +655 -0
  81. synth_ai/environments/examples/crafter_classic/agent_demos/old/trace_eval_OLD.py +202 -0
  82. synth_ai/environments/examples/crafter_classic/agent_demos/old/validate_openai_format.py +166 -0
  83. synth_ai/environments/examples/crafter_classic/environment.py +41 -2
  84. synth_ai/environments/examples/crafter_custom/agent_demos/__init__.py +1 -0
  85. synth_ai/environments/examples/crafter_custom/agent_demos/trace_eval.py +202 -0
  86. synth_ai/environments/examples/crafter_custom/old/analyze_diamond_issue.py +159 -0
  87. synth_ai/environments/examples/crafter_custom/old/analyze_diamond_spawning.py +158 -0
  88. synth_ai/environments/examples/crafter_custom/old/compare_worlds.py +71 -0
  89. synth_ai/environments/examples/crafter_custom/old/dataset_stats.py +105 -0
  90. synth_ai/environments/examples/crafter_custom/old/diamond_spawning_summary.py +119 -0
  91. synth_ai/environments/examples/crafter_custom/old/example_dataset_usage.py +52 -0
  92. synth_ai/environments/examples/enron/units/keyword_stats.py +112 -0
  93. synth_ai/environments/examples/minigrid/agent_demos/minigrid_evaluation_framework.py +1188 -0
  94. synth_ai/environments/examples/minigrid/agent_demos/minigrid_quick_evaluation.py +48 -0
  95. synth_ai/environments/examples/minigrid/agent_demos/minigrid_react_agent.py +562 -0
  96. synth_ai/environments/examples/minigrid/agent_demos/minigrid_trace_evaluation.py +221 -0
  97. synth_ai/environments/examples/nethack/agent_demos/nethack_evaluation_framework.py +981 -0
  98. synth_ai/environments/examples/nethack/agent_demos/nethack_quick_evaluation.py +74 -0
  99. synth_ai/environments/examples/nethack/agent_demos/nethack_react_agent.py +831 -0
  100. synth_ai/environments/examples/red/agent_demos/__init__.py +1 -0
  101. synth_ai/environments/examples/red/units/__init__.py +1 -0
  102. synth_ai/environments/examples/sokoban/agent_demos/sokoban_full_eval.py +899 -0
  103. synth_ai/environments/examples/sokoban/units/astar_common.py +95 -0
  104. synth_ai/environments/service/app.py +8 -0
  105. synth_ai/install_sqld.sh +40 -0
  106. synth_ai-0.2.5.dist-info/METADATA +106 -0
  107. {synth_ai-0.2.4.dev8.dist-info → synth_ai-0.2.5.dist-info}/RECORD +111 -12
  108. {synth_ai-0.2.4.dev8.dist-info → synth_ai-0.2.5.dist-info}/entry_points.txt +1 -0
  109. synth_ai-0.2.4.dev8.dist-info/METADATA +0 -635
  110. {synth_ai-0.2.4.dev8.dist-info → synth_ai-0.2.5.dist-info}/WHEEL +0 -0
  111. {synth_ai-0.2.4.dev8.dist-info → synth_ai-0.2.5.dist-info}/licenses/LICENSE +0 -0
  112. {synth_ai-0.2.4.dev8.dist-info → synth_ai-0.2.5.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,374 @@
1
+ from __future__ import annotations
2
+
3
+ import json
4
+ import os
5
+ import subprocess
6
+ import sys
7
+ import time
8
+ from dataclasses import dataclass
9
+ from typing import Any, Dict, Optional, Tuple
10
+
11
+ import urllib.request
12
+
13
+
14
+ @dataclass
15
+ class DemoEnv:
16
+ dev_backend_url: str = ""
17
+ synth_api_key: str = ""
18
+ env_api_key: str = ""
19
+ task_app_base_url: str = ""
20
+ task_app_name: str = ""
21
+ task_app_secret_name: str = ""
22
+
23
+
24
+ def _mask(value: str, keep: int = 4) -> str:
25
+ if not value:
26
+ return ""
27
+ return value[:keep] + "…" if len(value) > keep else value
28
+
29
+
30
+ def _state_path() -> str:
31
+ return os.path.expanduser("~/.synth-ai/demo.json")
32
+
33
+
34
+ def _read_state() -> Dict[str, Any]:
35
+ try:
36
+ path = _state_path()
37
+ if os.path.isfile(path):
38
+ with open(path) as fh:
39
+ data = json.load(fh) or {}
40
+ return data if isinstance(data, dict) else {}
41
+ except Exception:
42
+ return {}
43
+ return {}
44
+
45
+
46
+ def _write_state(data: Dict[str, Any]) -> None:
47
+ try:
48
+ path = _state_path()
49
+ os.makedirs(os.path.dirname(path), exist_ok=True)
50
+ with open(path, "w") as fh:
51
+ json.dump(data, fh)
52
+ except Exception:
53
+ pass
54
+
55
+
56
+ def load_dotenv_file(path: str) -> Dict[str, str]:
57
+ out: Dict[str, str] = {}
58
+ try:
59
+ with open(path) as fh:
60
+ for raw in fh:
61
+ line = raw.strip()
62
+ if not line or line.startswith("#") or "=" not in line:
63
+ continue
64
+ k, v = line.split("=", 1)
65
+ out[k.strip()] = v.strip().strip('"').strip("'")
66
+ except Exception:
67
+ pass
68
+ return out
69
+
70
+
71
+ def _persist_dotenv_values(path: str, values: Dict[str, str]) -> None:
72
+ """Ensure ``values`` are present in ``path`` (.env style)."""
73
+
74
+ try:
75
+ existing_lines: list[str] = []
76
+ if os.path.isfile(path):
77
+ with open(path) as fh:
78
+ existing_lines = fh.read().splitlines()
79
+ else:
80
+ os.makedirs(os.path.dirname(path) or ".", exist_ok=True)
81
+ mapping: Dict[str, str] = {}
82
+ order: list[str] = []
83
+ for line in existing_lines:
84
+ if not line or line.startswith("#") or "=" not in line:
85
+ order.append(line)
86
+ continue
87
+ key, val = line.split("=", 1)
88
+ key = key.strip()
89
+ mapping[key] = val
90
+ order.append(key)
91
+ for key, value in values.items():
92
+ if key not in mapping:
93
+ order.append(key)
94
+ mapping[key] = value
95
+ with open(path, "w") as fh:
96
+ for item in order:
97
+ if item in mapping:
98
+ fh.write(f"{item}={mapping[item]}\n")
99
+ else:
100
+ fh.write(item + "\n")
101
+ for key, value in values.items():
102
+ if key not in order:
103
+ fh.write(f"{key}={value}\n")
104
+ except Exception:
105
+ # Best-effort; failure to persist shouldn't crash CLI usage.
106
+ pass
107
+
108
+
109
+ def persist_dotenv_values(values: Dict[str, str], *, cwd: str | None = None) -> str:
110
+ path = os.path.join(cwd or os.getcwd(), ".env")
111
+ _persist_dotenv_values(path, values)
112
+ return path
113
+
114
+
115
+ def persist_env_api_key(key: str) -> None:
116
+ data = _read_state()
117
+ data["ENVIRONMENT_API_KEY"] = key
118
+ _write_state(data)
119
+
120
+
121
+ def modal_auth_status() -> Tuple[bool, str]:
122
+ """Return (ok, message) describing Modal CLI credential status."""
123
+
124
+ env_token_id = (os.environ.get("MODAL_TOKEN_ID") or "").strip()
125
+ env_token_secret = (os.environ.get("MODAL_TOKEN_SECRET") or "").strip()
126
+
127
+ try:
128
+ from modal.config import config as modal_config, user_config_path
129
+ except Exception as exc: # pragma: no cover - modal optional in some envs
130
+ return False, f"Modal client unavailable ({exc})"
131
+
132
+ token_id = env_token_id or str(modal_config.get("token_id") or "")
133
+ token_secret = env_token_secret or str(modal_config.get("token_secret") or "")
134
+ profile = os.environ.get("MODAL_PROFILE") or "default"
135
+
136
+ if token_id and token_secret:
137
+ source = "environment variables" if env_token_id else f"profile {profile}"
138
+ return True, f"{source} ({_mask(token_id, keep=6)})"
139
+
140
+ missing: list[str] = []
141
+ if not token_id:
142
+ missing.append("token_id")
143
+ if not token_secret:
144
+ missing.append("token_secret")
145
+
146
+ # If MODAL_TOKEN_ID is set but secret missing, highlight that specifically.
147
+ if env_token_id and not env_token_secret:
148
+ return False, (
149
+ "MODAL_TOKEN_ID is set but MODAL_TOKEN_SECRET is missing. Set both env vars "
150
+ "or regenerate credentials via `modal token new`."
151
+ )
152
+
153
+ try:
154
+ config_path = user_config_path
155
+ except Exception: # pragma: no cover - defensive
156
+ config_path = os.path.expanduser("~/.modal.toml")
157
+
158
+ hint = "Run `modal setup` or `modal token new` to authenticate."
159
+ if config_path and os.path.exists(config_path):
160
+ hint += f" (config: {config_path})"
161
+
162
+ missing_str = ", ".join(missing) or "credentials"
163
+ return False, f"Missing Modal {missing_str}. {hint}"
164
+
165
+
166
+ def load_env() -> DemoEnv:
167
+ """Resolve environment with sane defaults and auto-detection.
168
+
169
+ Backend URL:
170
+ - Use BACKEND_OVERRIDE (any) from CWD .env if set
171
+ - Else use DEV_BACKEND_URL from CWD .env ONLY if it's localhost/127.0.0.1 or :8000
172
+ - Else default to prod https://agent-learning.onrender.com/api
173
+
174
+ API keys:
175
+ - SYNTH_API_KEY from OS -> CWD .env -> repo .env -> pkg demo .env -> state
176
+ - If still missing, auto-pick DEV/PROD key based on backend and persist
177
+
178
+ TASK_APP_BASE_URL:
179
+ - OS -> CWD .env -> repo .env -> pkg demo .env -> state
180
+ """
181
+ env = DemoEnv()
182
+
183
+ os_env: Dict[str, str] = dict(os.environ)
184
+
185
+ # CWD .env
186
+ cwd_env_path = os.path.join(os.getcwd(), ".env")
187
+ cwd_env = load_dotenv_file(cwd_env_path)
188
+
189
+ # Repo/package .envs (fallbacks)
190
+ repo_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../.."))
191
+ repo_env = load_dotenv_file(os.path.join(repo_root, ".env"))
192
+ pkg_env = load_dotenv_file(os.path.join(repo_root, "synth_ai", "demos", "demo_task_apps", "math", ".env"))
193
+ examples_env = load_dotenv_file(os.path.join(repo_root, "examples", "rl", ".env"))
194
+
195
+ state = _read_state()
196
+
197
+ # Backend URL resolution
198
+ backend_override = (cwd_env.get("BACKEND_OVERRIDE") or "").strip()
199
+ dev_env = (cwd_env.get("DEV_BACKEND_URL") or "").strip()
200
+ use_dev = False
201
+ if backend_override:
202
+ dev_url = backend_override
203
+ use_dev = True
204
+ elif dev_env:
205
+ lower = dev_env.lower()
206
+ if "localhost" in lower or "127.0.0.1" in lower or lower.endswith(":8000"):
207
+ dev_url = dev_env
208
+ use_dev = True
209
+ else:
210
+ dev_url = "https://agent-learning.onrender.com/api"
211
+ else:
212
+ dev_url = "https://agent-learning.onrender.com/api"
213
+ if not dev_url.endswith("/api"):
214
+ dev_url = dev_url.rstrip("/") + "/api"
215
+
216
+ # API key selection
217
+ synth_api_key = (
218
+ os_env.get("SYNTH_API_KEY")
219
+ or cwd_env.get("SYNTH_API_KEY")
220
+ or repo_env.get("SYNTH_API_KEY")
221
+ or pkg_env.get("SYNTH_API_KEY")
222
+ or str(state.get("SYNTH_API_KEY") or "")
223
+ )
224
+ if not synth_api_key:
225
+ mode = "prod" if "agent-learning.onrender.com" in dev_url else ("local" if ("localhost" in dev_url or "127.0.0.1" in dev_url) else "dev")
226
+ if mode == "prod":
227
+ synth_api_key = (
228
+ os_env.get("PROD_SYNTH_API_KEY")
229
+ or cwd_env.get("PROD_SYNTH_API_KEY")
230
+ or repo_env.get("PROD_SYNTH_API_KEY")
231
+ or pkg_env.get("PROD_SYNTH_API_KEY")
232
+ or ""
233
+ )
234
+ else:
235
+ synth_api_key = (
236
+ os_env.get("DEV_SYNTH_API_KEY")
237
+ or cwd_env.get("DEV_SYNTH_API_KEY")
238
+ or repo_env.get("DEV_SYNTH_API_KEY")
239
+ or pkg_env.get("DEV_SYNTH_API_KEY")
240
+ or os_env.get("TESTING_LOCAL_SYNTH_API_KEY")
241
+ or cwd_env.get("TESTING_LOCAL_SYNTH_API_KEY")
242
+ or repo_env.get("TESTING_LOCAL_SYNTH_API_KEY")
243
+ or pkg_env.get("TESTING_LOCAL_SYNTH_API_KEY")
244
+ or ""
245
+ )
246
+ if synth_api_key:
247
+ st = dict(state)
248
+ st["SYNTH_API_KEY"] = synth_api_key
249
+ _write_state(st)
250
+
251
+ env_api_key = (
252
+ os_env.get("ENVIRONMENT_API_KEY")
253
+ or cwd_env.get("ENVIRONMENT_API_KEY")
254
+ or repo_env.get("ENVIRONMENT_API_KEY")
255
+ or pkg_env.get("ENVIRONMENT_API_KEY")
256
+ or examples_env.get("ENVIRONMENT_API_KEY")
257
+ or str(state.get("ENVIRONMENT_API_KEY") or "")
258
+ )
259
+
260
+ # Task app URL
261
+ task_url = (
262
+ os_env.get("TASK_APP_BASE_URL")
263
+ or cwd_env.get("TASK_APP_BASE_URL")
264
+ or repo_env.get("TASK_APP_BASE_URL")
265
+ or pkg_env.get("TASK_APP_BASE_URL")
266
+ or str(state.get("TASK_APP_BASE_URL") or "")
267
+ )
268
+
269
+ task_app_name = str(state.get("TASK_APP_NAME") or "")
270
+ task_app_secret_name = str(state.get("TASK_APP_SECRET_NAME") or "")
271
+
272
+ env.dev_backend_url = dev_url.rstrip("/")
273
+ env.synth_api_key = synth_api_key
274
+ env.env_api_key = env_api_key
275
+ env.task_app_base_url = task_url.rstrip("/")
276
+ env.task_app_name = task_app_name
277
+ env.task_app_secret_name = task_app_secret_name
278
+
279
+ print("ENV:")
280
+ print(f" DEV_BACKEND_URL={env.dev_backend_url}")
281
+ print(f" SYNTH_API_KEY={_mask(env.synth_api_key)}")
282
+ print(f" ENVIRONMENT_API_KEY={_mask(env.env_api_key)}")
283
+ print(f" TASK_APP_BASE_URL={env.task_app_base_url}")
284
+ if task_app_name:
285
+ print(f" TASK_APP_NAME={task_app_name}")
286
+ if task_app_secret_name:
287
+ print(f" TASK_APP_SECRET_NAME={task_app_secret_name}")
288
+ return env
289
+
290
+
291
+ def assert_http_ok(url: str, method: str = "GET", allow_redirects: bool = True, timeout: float = 10.0) -> bool:
292
+ try:
293
+ req = urllib.request.Request(url, method=method)
294
+ with urllib.request.urlopen(req, timeout=timeout) as resp: # nosec - controlled URL
295
+ code = getattr(resp, "status", 200)
296
+ return 200 <= int(code) < 400
297
+ except Exception:
298
+ return False
299
+
300
+
301
+ def deploy_modal_math(env: DemoEnv) -> str:
302
+ """Deploy Math Task App to Modal using in-repo deploy script; return public URL."""
303
+ # Prefer the script colocated under demo_task_apps/math relative to this file
304
+ this_dir = os.path.dirname(__file__)
305
+ demo_script = os.path.join(this_dir, "math", "deploy_task_app.sh")
306
+ # Fallback to top-level examples path if needed (repo root heuristic)
307
+ repo_root = os.path.abspath(os.path.join(this_dir, "../../.."))
308
+ fallback_script = os.path.join(repo_root, "examples", "rl", "deploy_task_app.sh")
309
+ script = demo_script if os.path.isfile(demo_script) else fallback_script
310
+ if not os.path.isfile(script):
311
+ raise RuntimeError(f"deploy_task_app.sh not found at {demo_script} or {fallback_script}")
312
+
313
+ envp = os.environ.copy()
314
+ if env.env_api_key:
315
+ envp["ENVIRONMENT_API_KEY"] = env.env_api_key
316
+ print(f"Deploying Math Task App to Modal using: {script}")
317
+ subprocess.check_call(["bash", script], cwd=os.path.dirname(script), env=envp)
318
+
319
+ # Read last deploy log for URL
320
+ for candidate in (".last_deploy.log", ".last_deploy.dev.log", ".last_deploy.manual.log"):
321
+ p = os.path.join(os.path.dirname(script), candidate)
322
+ try:
323
+ with open(p) as fh:
324
+ for line in fh:
325
+ if "modal.run" in line:
326
+ return line.strip().split()[-1].rstrip("/")
327
+ except Exception:
328
+ continue
329
+ raise RuntimeError("Failed to extract Modal Task App URL from deploy logs")
330
+
331
+
332
+ def persist_task_url(url: str, *, name: str | None = None) -> None:
333
+ data = _read_state()
334
+ changed: list[str] = []
335
+ if data.get("TASK_APP_BASE_URL") != url:
336
+ data["TASK_APP_BASE_URL"] = url
337
+ changed.append("TASK_APP_BASE_URL")
338
+ if name:
339
+ if data.get("TASK_APP_NAME") != name:
340
+ data["TASK_APP_NAME"] = name
341
+ changed.append("TASK_APP_NAME")
342
+ secret_name = f"{name}-secret"
343
+ if data.get("TASK_APP_SECRET_NAME") != secret_name:
344
+ data["TASK_APP_SECRET_NAME"] = secret_name
345
+ if "TASK_APP_NAME" not in changed:
346
+ changed.append("TASK_APP_SECRET_NAME")
347
+ _write_state(data)
348
+ if changed:
349
+ print(f"Saved {', '.join(changed)} to {_state_path()}")
350
+ if "TASK_APP_NAME" in changed or "TASK_APP_SECRET_NAME" in changed:
351
+ print(f"TASK_APP_SECRET_NAME={data.get('TASK_APP_SECRET_NAME', '')}")
352
+
353
+
354
+ def persist_api_key(key: str) -> None:
355
+ data = _read_state()
356
+ data["SYNTH_API_KEY"] = key
357
+ _write_state(data)
358
+
359
+
360
+ def run_job(env: DemoEnv, config_toml_path: str, *, batch_size: Optional[int] = None, group_size: Optional[int] = None, model: Optional[str] = None) -> None:
361
+ """Create and stream a short RL job using the backend API (placeholder: prints cURL to execute)."""
362
+ backend = env.dev_backend_url.rstrip("/")
363
+ if backend.endswith("/api"):
364
+ api_base = backend
365
+ else:
366
+ api_base = backend + "/api"
367
+ print("\nTo create an RL job, run:")
368
+ print(
369
+ "curl -s -X POST \"" + api_base + "/rl/jobs\" "
370
+ "-H 'Content-Type: application/json' "
371
+ f"-H 'Authorization: Bearer {env.synth_api_key}' "
372
+ "-d '{" # intentionally not fully formed here for brevity in this scaffold
373
+ )
374
+ print(" NOTE: CLI implementation will build the full JSON body with inline TOML config and stream events.")
@@ -0,0 +1 @@
1
+ # Package namespace for Math demo task app
@@ -0,0 +1,37 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ from fastapi import FastAPI
5
+ from starlette.middleware.cors import CORSMiddleware
6
+
7
+ # Reuse the examples/rl task_app routes if available
8
+ try:
9
+ from synth_ai.examples.rl.task_app import make_app as make_rl_app # type: ignore
10
+ except Exception: # fallback path when imported from repo root
11
+ try:
12
+ from examples.rl.task_app import make_app as make_rl_app # type: ignore
13
+ except Exception as e: # pragma: no cover
14
+ raise ImportError(f"Unable to import RL task app: {e}")
15
+
16
+
17
+ def create_app() -> FastAPI:
18
+ # Configure math defaults via env (consumed by RL task_app helpers)
19
+ os.environ.setdefault("DEMO_ENV_NAME", "math")
20
+ os.environ.setdefault("DEMO_POLICY_NAME", "math-react")
21
+ # Build base app
22
+ app = make_rl_app()
23
+ # CORS for local demo
24
+ app.add_middleware(
25
+ CORSMiddleware,
26
+ allow_origins=["*"],
27
+ allow_credentials=True,
28
+ allow_methods=["*"],
29
+ allow_headers=["*"],
30
+ )
31
+ return app
32
+
33
+
34
+ def run(host: str = "127.0.0.1", port: int = 8080):
35
+ import uvicorn
36
+
37
+ uvicorn.run(create_app(), host=host, port=int(os.getenv("PORT", port)))
@@ -0,0 +1,44 @@
1
+ [model]
2
+ name = "Qwen/Qwen3-0.6B"
3
+ dtype = "bfloat16"
4
+ seed = 42
5
+
6
+ [reference]
7
+ placement = "dedicated"
8
+ port = 8002
9
+
10
+ [topology]
11
+ type = "single_node_split"
12
+ gpus_for_vllm = 1
13
+ gpus_for_training = 1
14
+ gpus_for_ref = 1
15
+
16
+ [training]
17
+ num_epochs = 5
18
+ iterations_per_epoch = 1
19
+ batch_size = 4
20
+ group_size = 16
21
+ learning_rate = 5e-6
22
+ max_grad_norm = 0.5
23
+ log_interval = 1
24
+ update_reference_interval = 0
25
+ weight_sync_interval = 1
26
+
27
+ [evaluation]
28
+ seeds = [0, 1, 2, 3]
29
+ rollouts_per_seed = 1
30
+ instances = 1
31
+ max_concurrent_rollouts = 4
32
+ thinking_mode = "none"
33
+ every_n_iters = 2
34
+
35
+ [rollout]
36
+ env_name = "math"
37
+ policy_name = "math-react"
38
+ max_steps_per_episode = 1
39
+ sampling_temperature = 0.3
40
+ sampling_top_p = 0.95
41
+ max_tokens = 256
42
+ max_concurrent_rollouts = 8
43
+ ops_per_rollout = 2
44
+ on_done = "reset"
@@ -0,0 +1,60 @@
1
+ from __future__ import annotations
2
+
3
+ import os
4
+ import subprocess
5
+ from typing import Optional
6
+
7
+
8
+ def _parse_public_url_from_log(log_path: str) -> Optional[str]:
9
+ try:
10
+ with open(log_path) as fh:
11
+ for line in fh:
12
+ if "modal.run" in line:
13
+ return line.strip().split()[-1].rstrip("/")
14
+ except Exception:
15
+ return None
16
+ return None
17
+
18
+
19
+ def deploy(script_path: Optional[str] = None, *, env_api_key: Optional[str] = None) -> str:
20
+ """
21
+ Deploy the Math Task App to Modal and return the public URL.
22
+
23
+ - If script_path is provided, run it (bash) and parse .last_deploy*.log for URL.
24
+ - Otherwise, try to call a built-in deploy() in examples.rl.task_app if available.
25
+ """
26
+ envp = os.environ.copy()
27
+ if env_api_key:
28
+ envp["ENVIRONMENT_API_KEY"] = env_api_key
29
+
30
+ # Path-based deployment (preferred when a canonical script is supplied)
31
+ if script_path:
32
+ script_path = os.path.abspath(script_path)
33
+ if not os.path.isfile(script_path):
34
+ raise FileNotFoundError(f"Deploy script not found: {script_path}")
35
+ subprocess.check_call(["bash", script_path], cwd=os.path.dirname(script_path), env=envp)
36
+ # Try common log names in the same directory
37
+ for name in (".last_deploy.log", ".last_deploy.dev.log", ".last_deploy.manual.log"):
38
+ url = _parse_public_url_from_log(os.path.join(os.path.dirname(script_path), name))
39
+ if url:
40
+ return url
41
+ raise RuntimeError("Deployed, but failed to extract Modal public URL from deploy logs.")
42
+
43
+ # Python-based deployment via examples.rl.task_app (if available)
44
+ try:
45
+ import importlib
46
+
47
+ mod = importlib.import_module("examples.rl.task_app")
48
+ if hasattr(mod, "deploy"):
49
+ url = mod.deploy(env_api_key=env_api_key)
50
+ if not url:
51
+ raise RuntimeError("examples.rl.task_app.deploy() returned empty URL")
52
+ return str(url).rstrip("/")
53
+ raise RuntimeError("examples.rl.task_app.deploy() not found")
54
+ except Exception as e:
55
+ raise RuntimeError(
56
+ f"No deploy script provided and Python-based deploy failed: {e}. "
57
+ "Pass --script /path/to/deploy_task_app.sh to demo.deploy."
58
+ )
59
+
60
+
@@ -0,0 +1,22 @@
1
+ #!/usr/bin/env bash
2
+ set -euo pipefail
3
+
4
+ script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
5
+ log_file="${script_dir}/.last_deploy.log"
6
+ rm -f "$log_file"
7
+
8
+ : "${ENVIRONMENT_API_KEY:=}"
9
+
10
+ repo_root="$(cd "${script_dir}/../../../.." && pwd)"
11
+ fallback_script="${repo_root}/examples/rl/deploy_task_app.sh"
12
+
13
+ if [[ -f "${fallback_script}" ]]; then
14
+ echo "Using ${fallback_script} via 'uv run'" | tee -a "$log_file"
15
+ (cd "${repo_root}/examples/rl" && ENVIRONMENT_API_KEY="${ENVIRONMENT_API_KEY}" uv run bash "${fallback_script}" | tee -a "$log_file")
16
+ else
17
+ echo "ERROR: Deploy script not found at ${fallback_script}. Pass --script /path/to/deploy_task_app.sh" | tee -a "$log_file"
18
+ exit 1
19
+ fi
20
+
21
+ echo "Deploy finished. Inspect $log_file for the public URL (…modal.run)." | tee -a "$log_file"
22
+
@@ -0,0 +1,33 @@
1
+ """Multi-armed bandit example environment."""
2
+
3
+ from .engine import (
4
+ BanditEngine,
5
+ BanditEngineSnapshot,
6
+ BanditPrivateState,
7
+ BanditPublicState,
8
+ SynthBanditCheckpointObservationCallable,
9
+ SynthBanditObservationCallable,
10
+ )
11
+ from .environment import BanditActionInput, BanditEnvironment, BanditInteractTool
12
+ from .taskset import (
13
+ BanditTaskInstance,
14
+ BanditTaskInstanceMetadata,
15
+ create_bandit_taskset,
16
+ taskset,
17
+ )
18
+
19
+ __all__ = [
20
+ "BanditEngine",
21
+ "BanditPublicState",
22
+ "BanditPrivateState",
23
+ "BanditEngineSnapshot",
24
+ "SynthBanditObservationCallable",
25
+ "SynthBanditCheckpointObservationCallable",
26
+ "BanditEnvironment",
27
+ "BanditInteractTool",
28
+ "BanditActionInput",
29
+ "BanditTaskInstance",
30
+ "BanditTaskInstanceMetadata",
31
+ "create_bandit_taskset",
32
+ "taskset",
33
+ ]