synth-ai 0.2.4.dev8__py3-none-any.whl → 0.2.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- synth_ai/__init__.py +1 -1
- synth_ai/cli/__init__.py +6 -0
- synth_ai/cli/demo.py +68 -9
- synth_ai/cli/rl_demo.py +137 -0
- synth_ai/cli/root.py +65 -0
- synth_ai/demos/core/__init__.py +1 -0
- synth_ai/demos/core/cli.py +685 -0
- synth_ai/demos/demo_task_apps/__init__.py +1 -0
- synth_ai/demos/demo_task_apps/core.py +374 -0
- synth_ai/demos/demo_task_apps/math/__init__.py +1 -0
- synth_ai/demos/demo_task_apps/math/app.py +37 -0
- synth_ai/demos/demo_task_apps/math/config.toml +44 -0
- synth_ai/demos/demo_task_apps/math/deploy_modal.py +60 -0
- synth_ai/demos/demo_task_apps/math/deploy_task_app.sh +22 -0
- synth_ai/environments/examples/bandit/__init__.py +33 -0
- synth_ai/environments/examples/bandit/engine.py +294 -0
- synth_ai/environments/examples/bandit/environment.py +194 -0
- synth_ai/environments/examples/bandit/taskset.py +200 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/analyze_semantic_words_markdown.py +250 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_comprehensive_evaluation.py +59 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_browser.py +152 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_config.toml +24 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_evaluation_framework.py +1194 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/crafter_synth_config.toml +56 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_config_modal.toml +32 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_traces_sft_turso.py +724 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/kick_off_ft_modal.py +384 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_action_results.py +53 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_agent_actions.py +178 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_latest_run.py +222 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_lm_traces.py +183 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_no_rewards.py +210 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/analyze_trace_issue.py +206 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/check_db_schema.py +49 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/check_latest_results.py +64 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/debug_agent_responses.py +88 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/old/quick_trace_check.py +77 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/compare_experiments.py +324 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/filter_traces_sft_turso.py +580 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/kick_off_ft_oai.py +362 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/multi_model_config.toml +49 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_enhanced_hooks.py +332 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_hook_events.py +97 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/analyze_hook_results.py +217 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/check_hook_storage.py +87 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/check_seeds.py +88 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/compare_seed_performance.py +195 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/custom_eval_pipelines.py +400 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/plot_hook_frequency.py +195 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/old/seed_analysis_summary.py +56 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_openai_ft/run_rollouts_for_models_and_compare_v3.py +858 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_quick_evaluation.py +52 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_react_agent.py +874 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_trace_evaluation.py +1412 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/example_v3_usage.py +216 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/compare_traces.py +296 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_comprehensive_evaluation.py +58 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_env_serialization.py +464 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_evaluation_browser.py +152 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_quick_evaluation.py +51 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/crafter_trace_evaluation.py +1412 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/debug_player_loss.py +112 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/diagnose_service.py +203 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/diagnose_slowness.py +305 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/eval_by_difficulty.py +126 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/eval_example.py +94 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/explore_saved_states.py +142 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/filter_traces_sft.py +26 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/filter_traces_sft_OLD.py +984 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_data_gemini.py +724 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_data_modal.py +386 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/generate_ft_metadata.py +205 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/kick_off_ft_gemini.py +150 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/kick_off_ft_modal.py +283 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/prepare_vertex_ft.py +280 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/profile_env_slowness.py +456 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/replicate_issue.py +166 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/run_and_eval.py +102 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/run_comparison.py +128 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/run_qwen_rollouts.py +655 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/trace_eval_OLD.py +202 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/old/validate_openai_format.py +166 -0
- synth_ai/environments/examples/crafter_classic/environment.py +41 -2
- synth_ai/environments/examples/crafter_custom/agent_demos/__init__.py +1 -0
- synth_ai/environments/examples/crafter_custom/agent_demos/trace_eval.py +202 -0
- synth_ai/environments/examples/crafter_custom/old/analyze_diamond_issue.py +159 -0
- synth_ai/environments/examples/crafter_custom/old/analyze_diamond_spawning.py +158 -0
- synth_ai/environments/examples/crafter_custom/old/compare_worlds.py +71 -0
- synth_ai/environments/examples/crafter_custom/old/dataset_stats.py +105 -0
- synth_ai/environments/examples/crafter_custom/old/diamond_spawning_summary.py +119 -0
- synth_ai/environments/examples/crafter_custom/old/example_dataset_usage.py +52 -0
- synth_ai/environments/examples/enron/units/keyword_stats.py +112 -0
- synth_ai/environments/examples/minigrid/agent_demos/minigrid_evaluation_framework.py +1188 -0
- synth_ai/environments/examples/minigrid/agent_demos/minigrid_quick_evaluation.py +48 -0
- synth_ai/environments/examples/minigrid/agent_demos/minigrid_react_agent.py +562 -0
- synth_ai/environments/examples/minigrid/agent_demos/minigrid_trace_evaluation.py +221 -0
- synth_ai/environments/examples/nethack/agent_demos/nethack_evaluation_framework.py +981 -0
- synth_ai/environments/examples/nethack/agent_demos/nethack_quick_evaluation.py +74 -0
- synth_ai/environments/examples/nethack/agent_demos/nethack_react_agent.py +831 -0
- synth_ai/environments/examples/red/agent_demos/__init__.py +1 -0
- synth_ai/environments/examples/red/units/__init__.py +1 -0
- synth_ai/environments/examples/sokoban/agent_demos/sokoban_full_eval.py +899 -0
- synth_ai/environments/examples/sokoban/units/astar_common.py +95 -0
- synth_ai/environments/service/app.py +8 -0
- synth_ai/install_sqld.sh +40 -0
- synth_ai-0.2.5.dist-info/METADATA +106 -0
- {synth_ai-0.2.4.dev8.dist-info → synth_ai-0.2.5.dist-info}/RECORD +111 -12
- {synth_ai-0.2.4.dev8.dist-info → synth_ai-0.2.5.dist-info}/entry_points.txt +1 -0
- synth_ai-0.2.4.dev8.dist-info/METADATA +0 -635
- {synth_ai-0.2.4.dev8.dist-info → synth_ai-0.2.5.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.4.dev8.dist-info → synth_ai-0.2.5.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.2.4.dev8.dist-info → synth_ai-0.2.5.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,374 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import os
|
|
5
|
+
import subprocess
|
|
6
|
+
import sys
|
|
7
|
+
import time
|
|
8
|
+
from dataclasses import dataclass
|
|
9
|
+
from typing import Any, Dict, Optional, Tuple
|
|
10
|
+
|
|
11
|
+
import urllib.request
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
@dataclass
|
|
15
|
+
class DemoEnv:
|
|
16
|
+
dev_backend_url: str = ""
|
|
17
|
+
synth_api_key: str = ""
|
|
18
|
+
env_api_key: str = ""
|
|
19
|
+
task_app_base_url: str = ""
|
|
20
|
+
task_app_name: str = ""
|
|
21
|
+
task_app_secret_name: str = ""
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _mask(value: str, keep: int = 4) -> str:
|
|
25
|
+
if not value:
|
|
26
|
+
return ""
|
|
27
|
+
return value[:keep] + "…" if len(value) > keep else value
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
def _state_path() -> str:
|
|
31
|
+
return os.path.expanduser("~/.synth-ai/demo.json")
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def _read_state() -> Dict[str, Any]:
|
|
35
|
+
try:
|
|
36
|
+
path = _state_path()
|
|
37
|
+
if os.path.isfile(path):
|
|
38
|
+
with open(path) as fh:
|
|
39
|
+
data = json.load(fh) or {}
|
|
40
|
+
return data if isinstance(data, dict) else {}
|
|
41
|
+
except Exception:
|
|
42
|
+
return {}
|
|
43
|
+
return {}
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
def _write_state(data: Dict[str, Any]) -> None:
|
|
47
|
+
try:
|
|
48
|
+
path = _state_path()
|
|
49
|
+
os.makedirs(os.path.dirname(path), exist_ok=True)
|
|
50
|
+
with open(path, "w") as fh:
|
|
51
|
+
json.dump(data, fh)
|
|
52
|
+
except Exception:
|
|
53
|
+
pass
|
|
54
|
+
|
|
55
|
+
|
|
56
|
+
def load_dotenv_file(path: str) -> Dict[str, str]:
|
|
57
|
+
out: Dict[str, str] = {}
|
|
58
|
+
try:
|
|
59
|
+
with open(path) as fh:
|
|
60
|
+
for raw in fh:
|
|
61
|
+
line = raw.strip()
|
|
62
|
+
if not line or line.startswith("#") or "=" not in line:
|
|
63
|
+
continue
|
|
64
|
+
k, v = line.split("=", 1)
|
|
65
|
+
out[k.strip()] = v.strip().strip('"').strip("'")
|
|
66
|
+
except Exception:
|
|
67
|
+
pass
|
|
68
|
+
return out
|
|
69
|
+
|
|
70
|
+
|
|
71
|
+
def _persist_dotenv_values(path: str, values: Dict[str, str]) -> None:
|
|
72
|
+
"""Ensure ``values`` are present in ``path`` (.env style)."""
|
|
73
|
+
|
|
74
|
+
try:
|
|
75
|
+
existing_lines: list[str] = []
|
|
76
|
+
if os.path.isfile(path):
|
|
77
|
+
with open(path) as fh:
|
|
78
|
+
existing_lines = fh.read().splitlines()
|
|
79
|
+
else:
|
|
80
|
+
os.makedirs(os.path.dirname(path) or ".", exist_ok=True)
|
|
81
|
+
mapping: Dict[str, str] = {}
|
|
82
|
+
order: list[str] = []
|
|
83
|
+
for line in existing_lines:
|
|
84
|
+
if not line or line.startswith("#") or "=" not in line:
|
|
85
|
+
order.append(line)
|
|
86
|
+
continue
|
|
87
|
+
key, val = line.split("=", 1)
|
|
88
|
+
key = key.strip()
|
|
89
|
+
mapping[key] = val
|
|
90
|
+
order.append(key)
|
|
91
|
+
for key, value in values.items():
|
|
92
|
+
if key not in mapping:
|
|
93
|
+
order.append(key)
|
|
94
|
+
mapping[key] = value
|
|
95
|
+
with open(path, "w") as fh:
|
|
96
|
+
for item in order:
|
|
97
|
+
if item in mapping:
|
|
98
|
+
fh.write(f"{item}={mapping[item]}\n")
|
|
99
|
+
else:
|
|
100
|
+
fh.write(item + "\n")
|
|
101
|
+
for key, value in values.items():
|
|
102
|
+
if key not in order:
|
|
103
|
+
fh.write(f"{key}={value}\n")
|
|
104
|
+
except Exception:
|
|
105
|
+
# Best-effort; failure to persist shouldn't crash CLI usage.
|
|
106
|
+
pass
|
|
107
|
+
|
|
108
|
+
|
|
109
|
+
def persist_dotenv_values(values: Dict[str, str], *, cwd: str | None = None) -> str:
|
|
110
|
+
path = os.path.join(cwd or os.getcwd(), ".env")
|
|
111
|
+
_persist_dotenv_values(path, values)
|
|
112
|
+
return path
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def persist_env_api_key(key: str) -> None:
|
|
116
|
+
data = _read_state()
|
|
117
|
+
data["ENVIRONMENT_API_KEY"] = key
|
|
118
|
+
_write_state(data)
|
|
119
|
+
|
|
120
|
+
|
|
121
|
+
def modal_auth_status() -> Tuple[bool, str]:
|
|
122
|
+
"""Return (ok, message) describing Modal CLI credential status."""
|
|
123
|
+
|
|
124
|
+
env_token_id = (os.environ.get("MODAL_TOKEN_ID") or "").strip()
|
|
125
|
+
env_token_secret = (os.environ.get("MODAL_TOKEN_SECRET") or "").strip()
|
|
126
|
+
|
|
127
|
+
try:
|
|
128
|
+
from modal.config import config as modal_config, user_config_path
|
|
129
|
+
except Exception as exc: # pragma: no cover - modal optional in some envs
|
|
130
|
+
return False, f"Modal client unavailable ({exc})"
|
|
131
|
+
|
|
132
|
+
token_id = env_token_id or str(modal_config.get("token_id") or "")
|
|
133
|
+
token_secret = env_token_secret or str(modal_config.get("token_secret") or "")
|
|
134
|
+
profile = os.environ.get("MODAL_PROFILE") or "default"
|
|
135
|
+
|
|
136
|
+
if token_id and token_secret:
|
|
137
|
+
source = "environment variables" if env_token_id else f"profile {profile}"
|
|
138
|
+
return True, f"{source} ({_mask(token_id, keep=6)})"
|
|
139
|
+
|
|
140
|
+
missing: list[str] = []
|
|
141
|
+
if not token_id:
|
|
142
|
+
missing.append("token_id")
|
|
143
|
+
if not token_secret:
|
|
144
|
+
missing.append("token_secret")
|
|
145
|
+
|
|
146
|
+
# If MODAL_TOKEN_ID is set but secret missing, highlight that specifically.
|
|
147
|
+
if env_token_id and not env_token_secret:
|
|
148
|
+
return False, (
|
|
149
|
+
"MODAL_TOKEN_ID is set but MODAL_TOKEN_SECRET is missing. Set both env vars "
|
|
150
|
+
"or regenerate credentials via `modal token new`."
|
|
151
|
+
)
|
|
152
|
+
|
|
153
|
+
try:
|
|
154
|
+
config_path = user_config_path
|
|
155
|
+
except Exception: # pragma: no cover - defensive
|
|
156
|
+
config_path = os.path.expanduser("~/.modal.toml")
|
|
157
|
+
|
|
158
|
+
hint = "Run `modal setup` or `modal token new` to authenticate."
|
|
159
|
+
if config_path and os.path.exists(config_path):
|
|
160
|
+
hint += f" (config: {config_path})"
|
|
161
|
+
|
|
162
|
+
missing_str = ", ".join(missing) or "credentials"
|
|
163
|
+
return False, f"Missing Modal {missing_str}. {hint}"
|
|
164
|
+
|
|
165
|
+
|
|
166
|
+
def load_env() -> DemoEnv:
|
|
167
|
+
"""Resolve environment with sane defaults and auto-detection.
|
|
168
|
+
|
|
169
|
+
Backend URL:
|
|
170
|
+
- Use BACKEND_OVERRIDE (any) from CWD .env if set
|
|
171
|
+
- Else use DEV_BACKEND_URL from CWD .env ONLY if it's localhost/127.0.0.1 or :8000
|
|
172
|
+
- Else default to prod https://agent-learning.onrender.com/api
|
|
173
|
+
|
|
174
|
+
API keys:
|
|
175
|
+
- SYNTH_API_KEY from OS -> CWD .env -> repo .env -> pkg demo .env -> state
|
|
176
|
+
- If still missing, auto-pick DEV/PROD key based on backend and persist
|
|
177
|
+
|
|
178
|
+
TASK_APP_BASE_URL:
|
|
179
|
+
- OS -> CWD .env -> repo .env -> pkg demo .env -> state
|
|
180
|
+
"""
|
|
181
|
+
env = DemoEnv()
|
|
182
|
+
|
|
183
|
+
os_env: Dict[str, str] = dict(os.environ)
|
|
184
|
+
|
|
185
|
+
# CWD .env
|
|
186
|
+
cwd_env_path = os.path.join(os.getcwd(), ".env")
|
|
187
|
+
cwd_env = load_dotenv_file(cwd_env_path)
|
|
188
|
+
|
|
189
|
+
# Repo/package .envs (fallbacks)
|
|
190
|
+
repo_root = os.path.abspath(os.path.join(os.path.dirname(__file__), "../../.."))
|
|
191
|
+
repo_env = load_dotenv_file(os.path.join(repo_root, ".env"))
|
|
192
|
+
pkg_env = load_dotenv_file(os.path.join(repo_root, "synth_ai", "demos", "demo_task_apps", "math", ".env"))
|
|
193
|
+
examples_env = load_dotenv_file(os.path.join(repo_root, "examples", "rl", ".env"))
|
|
194
|
+
|
|
195
|
+
state = _read_state()
|
|
196
|
+
|
|
197
|
+
# Backend URL resolution
|
|
198
|
+
backend_override = (cwd_env.get("BACKEND_OVERRIDE") or "").strip()
|
|
199
|
+
dev_env = (cwd_env.get("DEV_BACKEND_URL") or "").strip()
|
|
200
|
+
use_dev = False
|
|
201
|
+
if backend_override:
|
|
202
|
+
dev_url = backend_override
|
|
203
|
+
use_dev = True
|
|
204
|
+
elif dev_env:
|
|
205
|
+
lower = dev_env.lower()
|
|
206
|
+
if "localhost" in lower or "127.0.0.1" in lower or lower.endswith(":8000"):
|
|
207
|
+
dev_url = dev_env
|
|
208
|
+
use_dev = True
|
|
209
|
+
else:
|
|
210
|
+
dev_url = "https://agent-learning.onrender.com/api"
|
|
211
|
+
else:
|
|
212
|
+
dev_url = "https://agent-learning.onrender.com/api"
|
|
213
|
+
if not dev_url.endswith("/api"):
|
|
214
|
+
dev_url = dev_url.rstrip("/") + "/api"
|
|
215
|
+
|
|
216
|
+
# API key selection
|
|
217
|
+
synth_api_key = (
|
|
218
|
+
os_env.get("SYNTH_API_KEY")
|
|
219
|
+
or cwd_env.get("SYNTH_API_KEY")
|
|
220
|
+
or repo_env.get("SYNTH_API_KEY")
|
|
221
|
+
or pkg_env.get("SYNTH_API_KEY")
|
|
222
|
+
or str(state.get("SYNTH_API_KEY") or "")
|
|
223
|
+
)
|
|
224
|
+
if not synth_api_key:
|
|
225
|
+
mode = "prod" if "agent-learning.onrender.com" in dev_url else ("local" if ("localhost" in dev_url or "127.0.0.1" in dev_url) else "dev")
|
|
226
|
+
if mode == "prod":
|
|
227
|
+
synth_api_key = (
|
|
228
|
+
os_env.get("PROD_SYNTH_API_KEY")
|
|
229
|
+
or cwd_env.get("PROD_SYNTH_API_KEY")
|
|
230
|
+
or repo_env.get("PROD_SYNTH_API_KEY")
|
|
231
|
+
or pkg_env.get("PROD_SYNTH_API_KEY")
|
|
232
|
+
or ""
|
|
233
|
+
)
|
|
234
|
+
else:
|
|
235
|
+
synth_api_key = (
|
|
236
|
+
os_env.get("DEV_SYNTH_API_KEY")
|
|
237
|
+
or cwd_env.get("DEV_SYNTH_API_KEY")
|
|
238
|
+
or repo_env.get("DEV_SYNTH_API_KEY")
|
|
239
|
+
or pkg_env.get("DEV_SYNTH_API_KEY")
|
|
240
|
+
or os_env.get("TESTING_LOCAL_SYNTH_API_KEY")
|
|
241
|
+
or cwd_env.get("TESTING_LOCAL_SYNTH_API_KEY")
|
|
242
|
+
or repo_env.get("TESTING_LOCAL_SYNTH_API_KEY")
|
|
243
|
+
or pkg_env.get("TESTING_LOCAL_SYNTH_API_KEY")
|
|
244
|
+
or ""
|
|
245
|
+
)
|
|
246
|
+
if synth_api_key:
|
|
247
|
+
st = dict(state)
|
|
248
|
+
st["SYNTH_API_KEY"] = synth_api_key
|
|
249
|
+
_write_state(st)
|
|
250
|
+
|
|
251
|
+
env_api_key = (
|
|
252
|
+
os_env.get("ENVIRONMENT_API_KEY")
|
|
253
|
+
or cwd_env.get("ENVIRONMENT_API_KEY")
|
|
254
|
+
or repo_env.get("ENVIRONMENT_API_KEY")
|
|
255
|
+
or pkg_env.get("ENVIRONMENT_API_KEY")
|
|
256
|
+
or examples_env.get("ENVIRONMENT_API_KEY")
|
|
257
|
+
or str(state.get("ENVIRONMENT_API_KEY") or "")
|
|
258
|
+
)
|
|
259
|
+
|
|
260
|
+
# Task app URL
|
|
261
|
+
task_url = (
|
|
262
|
+
os_env.get("TASK_APP_BASE_URL")
|
|
263
|
+
or cwd_env.get("TASK_APP_BASE_URL")
|
|
264
|
+
or repo_env.get("TASK_APP_BASE_URL")
|
|
265
|
+
or pkg_env.get("TASK_APP_BASE_URL")
|
|
266
|
+
or str(state.get("TASK_APP_BASE_URL") or "")
|
|
267
|
+
)
|
|
268
|
+
|
|
269
|
+
task_app_name = str(state.get("TASK_APP_NAME") or "")
|
|
270
|
+
task_app_secret_name = str(state.get("TASK_APP_SECRET_NAME") or "")
|
|
271
|
+
|
|
272
|
+
env.dev_backend_url = dev_url.rstrip("/")
|
|
273
|
+
env.synth_api_key = synth_api_key
|
|
274
|
+
env.env_api_key = env_api_key
|
|
275
|
+
env.task_app_base_url = task_url.rstrip("/")
|
|
276
|
+
env.task_app_name = task_app_name
|
|
277
|
+
env.task_app_secret_name = task_app_secret_name
|
|
278
|
+
|
|
279
|
+
print("ENV:")
|
|
280
|
+
print(f" DEV_BACKEND_URL={env.dev_backend_url}")
|
|
281
|
+
print(f" SYNTH_API_KEY={_mask(env.synth_api_key)}")
|
|
282
|
+
print(f" ENVIRONMENT_API_KEY={_mask(env.env_api_key)}")
|
|
283
|
+
print(f" TASK_APP_BASE_URL={env.task_app_base_url}")
|
|
284
|
+
if task_app_name:
|
|
285
|
+
print(f" TASK_APP_NAME={task_app_name}")
|
|
286
|
+
if task_app_secret_name:
|
|
287
|
+
print(f" TASK_APP_SECRET_NAME={task_app_secret_name}")
|
|
288
|
+
return env
|
|
289
|
+
|
|
290
|
+
|
|
291
|
+
def assert_http_ok(url: str, method: str = "GET", allow_redirects: bool = True, timeout: float = 10.0) -> bool:
|
|
292
|
+
try:
|
|
293
|
+
req = urllib.request.Request(url, method=method)
|
|
294
|
+
with urllib.request.urlopen(req, timeout=timeout) as resp: # nosec - controlled URL
|
|
295
|
+
code = getattr(resp, "status", 200)
|
|
296
|
+
return 200 <= int(code) < 400
|
|
297
|
+
except Exception:
|
|
298
|
+
return False
|
|
299
|
+
|
|
300
|
+
|
|
301
|
+
def deploy_modal_math(env: DemoEnv) -> str:
|
|
302
|
+
"""Deploy Math Task App to Modal using in-repo deploy script; return public URL."""
|
|
303
|
+
# Prefer the script colocated under demo_task_apps/math relative to this file
|
|
304
|
+
this_dir = os.path.dirname(__file__)
|
|
305
|
+
demo_script = os.path.join(this_dir, "math", "deploy_task_app.sh")
|
|
306
|
+
# Fallback to top-level examples path if needed (repo root heuristic)
|
|
307
|
+
repo_root = os.path.abspath(os.path.join(this_dir, "../../.."))
|
|
308
|
+
fallback_script = os.path.join(repo_root, "examples", "rl", "deploy_task_app.sh")
|
|
309
|
+
script = demo_script if os.path.isfile(demo_script) else fallback_script
|
|
310
|
+
if not os.path.isfile(script):
|
|
311
|
+
raise RuntimeError(f"deploy_task_app.sh not found at {demo_script} or {fallback_script}")
|
|
312
|
+
|
|
313
|
+
envp = os.environ.copy()
|
|
314
|
+
if env.env_api_key:
|
|
315
|
+
envp["ENVIRONMENT_API_KEY"] = env.env_api_key
|
|
316
|
+
print(f"Deploying Math Task App to Modal using: {script}")
|
|
317
|
+
subprocess.check_call(["bash", script], cwd=os.path.dirname(script), env=envp)
|
|
318
|
+
|
|
319
|
+
# Read last deploy log for URL
|
|
320
|
+
for candidate in (".last_deploy.log", ".last_deploy.dev.log", ".last_deploy.manual.log"):
|
|
321
|
+
p = os.path.join(os.path.dirname(script), candidate)
|
|
322
|
+
try:
|
|
323
|
+
with open(p) as fh:
|
|
324
|
+
for line in fh:
|
|
325
|
+
if "modal.run" in line:
|
|
326
|
+
return line.strip().split()[-1].rstrip("/")
|
|
327
|
+
except Exception:
|
|
328
|
+
continue
|
|
329
|
+
raise RuntimeError("Failed to extract Modal Task App URL from deploy logs")
|
|
330
|
+
|
|
331
|
+
|
|
332
|
+
def persist_task_url(url: str, *, name: str | None = None) -> None:
|
|
333
|
+
data = _read_state()
|
|
334
|
+
changed: list[str] = []
|
|
335
|
+
if data.get("TASK_APP_BASE_URL") != url:
|
|
336
|
+
data["TASK_APP_BASE_URL"] = url
|
|
337
|
+
changed.append("TASK_APP_BASE_URL")
|
|
338
|
+
if name:
|
|
339
|
+
if data.get("TASK_APP_NAME") != name:
|
|
340
|
+
data["TASK_APP_NAME"] = name
|
|
341
|
+
changed.append("TASK_APP_NAME")
|
|
342
|
+
secret_name = f"{name}-secret"
|
|
343
|
+
if data.get("TASK_APP_SECRET_NAME") != secret_name:
|
|
344
|
+
data["TASK_APP_SECRET_NAME"] = secret_name
|
|
345
|
+
if "TASK_APP_NAME" not in changed:
|
|
346
|
+
changed.append("TASK_APP_SECRET_NAME")
|
|
347
|
+
_write_state(data)
|
|
348
|
+
if changed:
|
|
349
|
+
print(f"Saved {', '.join(changed)} to {_state_path()}")
|
|
350
|
+
if "TASK_APP_NAME" in changed or "TASK_APP_SECRET_NAME" in changed:
|
|
351
|
+
print(f"TASK_APP_SECRET_NAME={data.get('TASK_APP_SECRET_NAME', '')}")
|
|
352
|
+
|
|
353
|
+
|
|
354
|
+
def persist_api_key(key: str) -> None:
|
|
355
|
+
data = _read_state()
|
|
356
|
+
data["SYNTH_API_KEY"] = key
|
|
357
|
+
_write_state(data)
|
|
358
|
+
|
|
359
|
+
|
|
360
|
+
def run_job(env: DemoEnv, config_toml_path: str, *, batch_size: Optional[int] = None, group_size: Optional[int] = None, model: Optional[str] = None) -> None:
|
|
361
|
+
"""Create and stream a short RL job using the backend API (placeholder: prints cURL to execute)."""
|
|
362
|
+
backend = env.dev_backend_url.rstrip("/")
|
|
363
|
+
if backend.endswith("/api"):
|
|
364
|
+
api_base = backend
|
|
365
|
+
else:
|
|
366
|
+
api_base = backend + "/api"
|
|
367
|
+
print("\nTo create an RL job, run:")
|
|
368
|
+
print(
|
|
369
|
+
"curl -s -X POST \"" + api_base + "/rl/jobs\" "
|
|
370
|
+
"-H 'Content-Type: application/json' "
|
|
371
|
+
f"-H 'Authorization: Bearer {env.synth_api_key}' "
|
|
372
|
+
"-d '{" # intentionally not fully formed here for brevity in this scaffold
|
|
373
|
+
)
|
|
374
|
+
print(" NOTE: CLI implementation will build the full JSON body with inline TOML config and stream events.")
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
# Package namespace for Math demo task app
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
from fastapi import FastAPI
|
|
5
|
+
from starlette.middleware.cors import CORSMiddleware
|
|
6
|
+
|
|
7
|
+
# Reuse the examples/rl task_app routes if available
|
|
8
|
+
try:
|
|
9
|
+
from synth_ai.examples.rl.task_app import make_app as make_rl_app # type: ignore
|
|
10
|
+
except Exception: # fallback path when imported from repo root
|
|
11
|
+
try:
|
|
12
|
+
from examples.rl.task_app import make_app as make_rl_app # type: ignore
|
|
13
|
+
except Exception as e: # pragma: no cover
|
|
14
|
+
raise ImportError(f"Unable to import RL task app: {e}")
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
def create_app() -> FastAPI:
|
|
18
|
+
# Configure math defaults via env (consumed by RL task_app helpers)
|
|
19
|
+
os.environ.setdefault("DEMO_ENV_NAME", "math")
|
|
20
|
+
os.environ.setdefault("DEMO_POLICY_NAME", "math-react")
|
|
21
|
+
# Build base app
|
|
22
|
+
app = make_rl_app()
|
|
23
|
+
# CORS for local demo
|
|
24
|
+
app.add_middleware(
|
|
25
|
+
CORSMiddleware,
|
|
26
|
+
allow_origins=["*"],
|
|
27
|
+
allow_credentials=True,
|
|
28
|
+
allow_methods=["*"],
|
|
29
|
+
allow_headers=["*"],
|
|
30
|
+
)
|
|
31
|
+
return app
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def run(host: str = "127.0.0.1", port: int = 8080):
|
|
35
|
+
import uvicorn
|
|
36
|
+
|
|
37
|
+
uvicorn.run(create_app(), host=host, port=int(os.getenv("PORT", port)))
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
[model]
|
|
2
|
+
name = "Qwen/Qwen3-0.6B"
|
|
3
|
+
dtype = "bfloat16"
|
|
4
|
+
seed = 42
|
|
5
|
+
|
|
6
|
+
[reference]
|
|
7
|
+
placement = "dedicated"
|
|
8
|
+
port = 8002
|
|
9
|
+
|
|
10
|
+
[topology]
|
|
11
|
+
type = "single_node_split"
|
|
12
|
+
gpus_for_vllm = 1
|
|
13
|
+
gpus_for_training = 1
|
|
14
|
+
gpus_for_ref = 1
|
|
15
|
+
|
|
16
|
+
[training]
|
|
17
|
+
num_epochs = 5
|
|
18
|
+
iterations_per_epoch = 1
|
|
19
|
+
batch_size = 4
|
|
20
|
+
group_size = 16
|
|
21
|
+
learning_rate = 5e-6
|
|
22
|
+
max_grad_norm = 0.5
|
|
23
|
+
log_interval = 1
|
|
24
|
+
update_reference_interval = 0
|
|
25
|
+
weight_sync_interval = 1
|
|
26
|
+
|
|
27
|
+
[evaluation]
|
|
28
|
+
seeds = [0, 1, 2, 3]
|
|
29
|
+
rollouts_per_seed = 1
|
|
30
|
+
instances = 1
|
|
31
|
+
max_concurrent_rollouts = 4
|
|
32
|
+
thinking_mode = "none"
|
|
33
|
+
every_n_iters = 2
|
|
34
|
+
|
|
35
|
+
[rollout]
|
|
36
|
+
env_name = "math"
|
|
37
|
+
policy_name = "math-react"
|
|
38
|
+
max_steps_per_episode = 1
|
|
39
|
+
sampling_temperature = 0.3
|
|
40
|
+
sampling_top_p = 0.95
|
|
41
|
+
max_tokens = 256
|
|
42
|
+
max_concurrent_rollouts = 8
|
|
43
|
+
ops_per_rollout = 2
|
|
44
|
+
on_done = "reset"
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
import os
|
|
4
|
+
import subprocess
|
|
5
|
+
from typing import Optional
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
def _parse_public_url_from_log(log_path: str) -> Optional[str]:
|
|
9
|
+
try:
|
|
10
|
+
with open(log_path) as fh:
|
|
11
|
+
for line in fh:
|
|
12
|
+
if "modal.run" in line:
|
|
13
|
+
return line.strip().split()[-1].rstrip("/")
|
|
14
|
+
except Exception:
|
|
15
|
+
return None
|
|
16
|
+
return None
|
|
17
|
+
|
|
18
|
+
|
|
19
|
+
def deploy(script_path: Optional[str] = None, *, env_api_key: Optional[str] = None) -> str:
|
|
20
|
+
"""
|
|
21
|
+
Deploy the Math Task App to Modal and return the public URL.
|
|
22
|
+
|
|
23
|
+
- If script_path is provided, run it (bash) and parse .last_deploy*.log for URL.
|
|
24
|
+
- Otherwise, try to call a built-in deploy() in examples.rl.task_app if available.
|
|
25
|
+
"""
|
|
26
|
+
envp = os.environ.copy()
|
|
27
|
+
if env_api_key:
|
|
28
|
+
envp["ENVIRONMENT_API_KEY"] = env_api_key
|
|
29
|
+
|
|
30
|
+
# Path-based deployment (preferred when a canonical script is supplied)
|
|
31
|
+
if script_path:
|
|
32
|
+
script_path = os.path.abspath(script_path)
|
|
33
|
+
if not os.path.isfile(script_path):
|
|
34
|
+
raise FileNotFoundError(f"Deploy script not found: {script_path}")
|
|
35
|
+
subprocess.check_call(["bash", script_path], cwd=os.path.dirname(script_path), env=envp)
|
|
36
|
+
# Try common log names in the same directory
|
|
37
|
+
for name in (".last_deploy.log", ".last_deploy.dev.log", ".last_deploy.manual.log"):
|
|
38
|
+
url = _parse_public_url_from_log(os.path.join(os.path.dirname(script_path), name))
|
|
39
|
+
if url:
|
|
40
|
+
return url
|
|
41
|
+
raise RuntimeError("Deployed, but failed to extract Modal public URL from deploy logs.")
|
|
42
|
+
|
|
43
|
+
# Python-based deployment via examples.rl.task_app (if available)
|
|
44
|
+
try:
|
|
45
|
+
import importlib
|
|
46
|
+
|
|
47
|
+
mod = importlib.import_module("examples.rl.task_app")
|
|
48
|
+
if hasattr(mod, "deploy"):
|
|
49
|
+
url = mod.deploy(env_api_key=env_api_key)
|
|
50
|
+
if not url:
|
|
51
|
+
raise RuntimeError("examples.rl.task_app.deploy() returned empty URL")
|
|
52
|
+
return str(url).rstrip("/")
|
|
53
|
+
raise RuntimeError("examples.rl.task_app.deploy() not found")
|
|
54
|
+
except Exception as e:
|
|
55
|
+
raise RuntimeError(
|
|
56
|
+
f"No deploy script provided and Python-based deploy failed: {e}. "
|
|
57
|
+
"Pass --script /path/to/deploy_task_app.sh to demo.deploy."
|
|
58
|
+
)
|
|
59
|
+
|
|
60
|
+
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
#!/usr/bin/env bash
|
|
2
|
+
set -euo pipefail
|
|
3
|
+
|
|
4
|
+
script_dir="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
|
|
5
|
+
log_file="${script_dir}/.last_deploy.log"
|
|
6
|
+
rm -f "$log_file"
|
|
7
|
+
|
|
8
|
+
: "${ENVIRONMENT_API_KEY:=}"
|
|
9
|
+
|
|
10
|
+
repo_root="$(cd "${script_dir}/../../../.." && pwd)"
|
|
11
|
+
fallback_script="${repo_root}/examples/rl/deploy_task_app.sh"
|
|
12
|
+
|
|
13
|
+
if [[ -f "${fallback_script}" ]]; then
|
|
14
|
+
echo "Using ${fallback_script} via 'uv run'" | tee -a "$log_file"
|
|
15
|
+
(cd "${repo_root}/examples/rl" && ENVIRONMENT_API_KEY="${ENVIRONMENT_API_KEY}" uv run bash "${fallback_script}" | tee -a "$log_file")
|
|
16
|
+
else
|
|
17
|
+
echo "ERROR: Deploy script not found at ${fallback_script}. Pass --script /path/to/deploy_task_app.sh" | tee -a "$log_file"
|
|
18
|
+
exit 1
|
|
19
|
+
fi
|
|
20
|
+
|
|
21
|
+
echo "Deploy finished. Inspect $log_file for the public URL (…modal.run)." | tee -a "$log_file"
|
|
22
|
+
|
|
@@ -0,0 +1,33 @@
|
|
|
1
|
+
"""Multi-armed bandit example environment."""
|
|
2
|
+
|
|
3
|
+
from .engine import (
|
|
4
|
+
BanditEngine,
|
|
5
|
+
BanditEngineSnapshot,
|
|
6
|
+
BanditPrivateState,
|
|
7
|
+
BanditPublicState,
|
|
8
|
+
SynthBanditCheckpointObservationCallable,
|
|
9
|
+
SynthBanditObservationCallable,
|
|
10
|
+
)
|
|
11
|
+
from .environment import BanditActionInput, BanditEnvironment, BanditInteractTool
|
|
12
|
+
from .taskset import (
|
|
13
|
+
BanditTaskInstance,
|
|
14
|
+
BanditTaskInstanceMetadata,
|
|
15
|
+
create_bandit_taskset,
|
|
16
|
+
taskset,
|
|
17
|
+
)
|
|
18
|
+
|
|
19
|
+
__all__ = [
|
|
20
|
+
"BanditEngine",
|
|
21
|
+
"BanditPublicState",
|
|
22
|
+
"BanditPrivateState",
|
|
23
|
+
"BanditEngineSnapshot",
|
|
24
|
+
"SynthBanditObservationCallable",
|
|
25
|
+
"SynthBanditCheckpointObservationCallable",
|
|
26
|
+
"BanditEnvironment",
|
|
27
|
+
"BanditInteractTool",
|
|
28
|
+
"BanditActionInput",
|
|
29
|
+
"BanditTaskInstance",
|
|
30
|
+
"BanditTaskInstanceMetadata",
|
|
31
|
+
"create_bandit_taskset",
|
|
32
|
+
"taskset",
|
|
33
|
+
]
|