synth-ai 0.2.6__py3-none-any.whl → 0.2.6.dev2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- synth_ai/__init__.py +18 -25
- synth_ai/cli/rl_demo.py +52 -4
- synth_ai/demos/core/cli.py +443 -40
- synth_ai/demos/demo_task_apps/math/_common.py +17 -0
- synth_ai/demos/demo_task_apps/math/modal_task_app.py +415 -0
- synth_ai/environments/examples/crafter_classic/agent_demos/crafter_modal_ft/filter_traces_sft_turso.py +23 -9
- synth_ai/environments/service/app.py +13 -6
- synth_ai/experimental/synth_oss.py +2 -2
- synth_ai/http.py +26 -102
- synth_ai/http_client.py +104 -0
- synth_ai/lm/core/synth_models.py +2 -2
- synth_ai/tracing_v3/decorators.py +1 -0
- synth_ai/tracing_v3/hooks.py +1 -0
- synth_ai/tracing_v3/session_tracer.py +18 -7
- synth_ai/tracing_v3/turso/manager.py +3 -1
- synth_ai/tracing_v3/turso/models.py +3 -0
- synth_ai/tracing_v3/utils.py +1 -0
- {synth_ai-0.2.6.dist-info → synth_ai-0.2.6.dev2.dist-info}/METADATA +1 -1
- {synth_ai-0.2.6.dist-info → synth_ai-0.2.6.dev2.dist-info}/RECORD +23 -20
- {synth_ai-0.2.6.dist-info → synth_ai-0.2.6.dev2.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.6.dist-info → synth_ai-0.2.6.dev2.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.2.6.dist-info → synth_ai-0.2.6.dev2.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.2.6.dist-info → synth_ai-0.2.6.dev2.dist-info}/top_level.txt +0 -0
|
@@ -0,0 +1,415 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
"""Modal task app for Hendrycks MATH single-step RL environment."""
|
|
4
|
+
|
|
5
|
+
import os
|
|
6
|
+
from pathlib import Path
|
|
7
|
+
|
|
8
|
+
from modal import App, Image, Secret, asgi_app
|
|
9
|
+
from functools import lru_cache
|
|
10
|
+
|
|
11
|
+
# Self-contained: no external problem bank installer required
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
_HERE = Path(__file__).resolve()
|
|
15
|
+
_ROOT = _HERE.parent
|
|
16
|
+
_SYNTH_HOSTED = None
|
|
17
|
+
try:
|
|
18
|
+
probe = _HERE
|
|
19
|
+
for _ in range(8):
|
|
20
|
+
candidate = (probe / "backend/app/routes/clustered_training/dev/synth_envs_hosted").resolve()
|
|
21
|
+
if candidate.exists():
|
|
22
|
+
_SYNTH_HOSTED = candidate
|
|
23
|
+
break
|
|
24
|
+
if probe.parent == probe:
|
|
25
|
+
break
|
|
26
|
+
probe = probe.parent
|
|
27
|
+
except Exception:
|
|
28
|
+
_SYNTH_HOSTED = None
|
|
29
|
+
|
|
30
|
+
image = Image.debian_slim(python_version="3.11").pip_install(
|
|
31
|
+
"fastapi>=0.110.0",
|
|
32
|
+
"uvicorn>=0.23.0",
|
|
33
|
+
"pydantic>=2.6.0",
|
|
34
|
+
"httpx>=0.24.0",
|
|
35
|
+
"numpy>=1.24.0",
|
|
36
|
+
"aiohttp>=3.8.0",
|
|
37
|
+
"datasets>=2.16.0",
|
|
38
|
+
"synth-ai",
|
|
39
|
+
)
|
|
40
|
+
if _SYNTH_HOSTED is not None:
|
|
41
|
+
image = image.add_local_dir(str(_SYNTH_HOSTED), "/app/synth_envs_hosted")
|
|
42
|
+
|
|
43
|
+
# No extra local dirs required; app is self-contained
|
|
44
|
+
|
|
45
|
+
app = App("hendrycks-math-task-app")
|
|
46
|
+
_SECRET_NAME = os.getenv("MATH_TASK_APP_SECRET", "crafter-environment-sdk")
|
|
47
|
+
|
|
48
|
+
|
|
49
|
+
@app.function(
|
|
50
|
+
image=image,
|
|
51
|
+
timeout=600,
|
|
52
|
+
memory=16384,
|
|
53
|
+
cpu=4,
|
|
54
|
+
min_containers=1,
|
|
55
|
+
secrets=[Secret.from_name(_SECRET_NAME)],
|
|
56
|
+
)
|
|
57
|
+
@asgi_app()
|
|
58
|
+
def fastapi_app():
|
|
59
|
+
import httpx
|
|
60
|
+
from fastapi import Body, HTTPException, status
|
|
61
|
+
from fastapi import FastAPI, Request, Header
|
|
62
|
+
from fastapi.middleware.cors import CORSMiddleware
|
|
63
|
+
from fastapi.responses import JSONResponse
|
|
64
|
+
|
|
65
|
+
# Inline, self-contained FastAPI app (math-only)
|
|
66
|
+
@lru_cache(maxsize=1)
|
|
67
|
+
def _hf_split(subject: str, split: str, slice_spec: str | None = None):
|
|
68
|
+
from datasets import load_dataset # type: ignore
|
|
69
|
+
s = split
|
|
70
|
+
if slice_spec:
|
|
71
|
+
s = f"{s}{slice_spec}"
|
|
72
|
+
return load_dataset("nlile/hendrycks-MATH-benchmark", subject, split=s)
|
|
73
|
+
|
|
74
|
+
def _normalize_answer_text(s: str) -> str:
|
|
75
|
+
import re as _re
|
|
76
|
+
return _re.sub(r"[^0-9A-Za-z.+\-/*=]", "", (s or "").strip()).lower()
|
|
77
|
+
|
|
78
|
+
def _extract_boxed(s: str) -> str:
|
|
79
|
+
import re as _re
|
|
80
|
+
m = list(_re.finditer(r"\\boxed\{([^}]+)\}", s or ""))
|
|
81
|
+
return m[-1].group(1) if m else ""
|
|
82
|
+
|
|
83
|
+
def _load_hendrycks_problem(seed: int, subject: str | None = None) -> tuple[str, str]:
|
|
84
|
+
subj = subject or os.getenv("HENDRYCKS_MATH_CONFIG", "default")
|
|
85
|
+
ds = _hf_split(subj, os.getenv("HENDRYCKS_MATH_SPLIT", "test"), os.getenv("HENDRYCKS_MATH_SLICE"))
|
|
86
|
+
n = len(ds) if hasattr(ds, "__len__") else 0
|
|
87
|
+
if n == 0:
|
|
88
|
+
raise RuntimeError("Hendrycks MATH dataset loaded empty")
|
|
89
|
+
idx = abs(int(seed)) % n
|
|
90
|
+
ex = ds[int(idx)]
|
|
91
|
+
q = ex.get("problem") or ex.get("question") or ex.get("prompt")
|
|
92
|
+
a = ex.get("solution") or ex.get("answer") or ""
|
|
93
|
+
if not q:
|
|
94
|
+
raise RuntimeError("Hendrycks item missing problem text")
|
|
95
|
+
return str(q), str(a)
|
|
96
|
+
|
|
97
|
+
def create_app() -> FastAPI:
|
|
98
|
+
app = FastAPI(title="Hendrycks Math Task App", version="0.1.0")
|
|
99
|
+
app.add_middleware(
|
|
100
|
+
CORSMiddleware,
|
|
101
|
+
allow_origins=["*"],
|
|
102
|
+
allow_credentials=True,
|
|
103
|
+
allow_methods=["*"],
|
|
104
|
+
allow_headers=["*"],
|
|
105
|
+
)
|
|
106
|
+
|
|
107
|
+
@app.get("/info")
|
|
108
|
+
async def info():
|
|
109
|
+
return {
|
|
110
|
+
"service": {"base_url": os.getenv("SERVICE_BASE_URL", "")},
|
|
111
|
+
"inference": {"base_url": "", "endpoints": {"chat_completions": "/v1/chat/completions"}},
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
@app.get("/health")
|
|
115
|
+
async def health(x_api_key: str | None = Header(default=None, alias="X-API-Key")):
|
|
116
|
+
env_key = os.environ.get("ENVIRONMENT_API_KEY")
|
|
117
|
+
if not env_key:
|
|
118
|
+
return JSONResponse(status_code=503, content={"status": "unhealthy", "detail": "Missing ENVIRONMENT_API_KEY"})
|
|
119
|
+
if x_api_key is not None and x_api_key != env_key:
|
|
120
|
+
return JSONResponse(status_code=401, content={"status": "unauthorized", "detail": "Invalid API key"})
|
|
121
|
+
return {"status": "healthy"}
|
|
122
|
+
|
|
123
|
+
# Optional rollout-specific health for CLI compatibility
|
|
124
|
+
@app.get("/health/rollout")
|
|
125
|
+
async def health_rollout(x_api_key: str | None = Header(default=None, alias="X-API-Key")):
|
|
126
|
+
env_key = os.environ.get("ENVIRONMENT_API_KEY")
|
|
127
|
+
if not env_key:
|
|
128
|
+
return JSONResponse(status_code=503, content={"status": "unhealthy", "detail": "Missing ENVIRONMENT_API_KEY"})
|
|
129
|
+
if not x_api_key or x_api_key != env_key:
|
|
130
|
+
return JSONResponse(status_code=401, content={"status": "unauthorized", "detail": "Invalid or missing API key"})
|
|
131
|
+
return {"ok": True}
|
|
132
|
+
|
|
133
|
+
# _load_hendrycks_problem is defined at fastapi_app scope
|
|
134
|
+
|
|
135
|
+
@app.get("/task_info")
|
|
136
|
+
async def task_info(seed: int = 0, subject: str = "algebra"):
|
|
137
|
+
"""Return Hendrycks MATH problem/answer and tool schema for a seed."""
|
|
138
|
+
q, a = _load_hendrycks_problem(int(seed), subject=subject)
|
|
139
|
+
tools = [{
|
|
140
|
+
"name": "interact",
|
|
141
|
+
"description": "Submit one or more actions to the math environment.",
|
|
142
|
+
"parameters": {
|
|
143
|
+
"type": "object",
|
|
144
|
+
"properties": {"actions": {"type": "array", "items": {"type": "string"}}},
|
|
145
|
+
"required": ["actions"],
|
|
146
|
+
},
|
|
147
|
+
}]
|
|
148
|
+
return {
|
|
149
|
+
"seed": int(seed),
|
|
150
|
+
"subject": subject,
|
|
151
|
+
"system": "",
|
|
152
|
+
"user": q,
|
|
153
|
+
"tools": tools,
|
|
154
|
+
"policy": {"name": "math-react"},
|
|
155
|
+
"answer": a,
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
return app
|
|
159
|
+
|
|
160
|
+
api = create_app()
|
|
161
|
+
|
|
162
|
+
@api.get("/")
|
|
163
|
+
async def root_probe():
|
|
164
|
+
return {"status": "ok", "service": "math"}
|
|
165
|
+
|
|
166
|
+
@api.head("/")
|
|
167
|
+
async def head_probe():
|
|
168
|
+
return {"status": "ok"}
|
|
169
|
+
|
|
170
|
+
env_key = (
|
|
171
|
+
os.environ.get("ENVIRONMENT_API_KEY")
|
|
172
|
+
or os.environ.get("DEV_ENVIRONMENT_API_KEY")
|
|
173
|
+
or os.environ.get("dev_environment_api_key")
|
|
174
|
+
)
|
|
175
|
+
if not env_key:
|
|
176
|
+
raise RuntimeError("ENVIRONMENT_API_KEY missing in task app environment")
|
|
177
|
+
|
|
178
|
+
OPENAI_REMOVE_FIELDS = ("stop_after_tool_calls", "thinking_mode", "thinking_budget", "reasoning")
|
|
179
|
+
OPENAI_REMOVE_SAMPLING_FIELDS = ("temperature", "top_p")
|
|
180
|
+
TOOL_CHOICE_FORCE = {"type": "function", "function": {"name": "interact_many"}}
|
|
181
|
+
|
|
182
|
+
def _prepare_openai_payload(model: str | None, payload: dict[str, object]) -> dict[str, object]:
|
|
183
|
+
sanitized = dict(payload)
|
|
184
|
+
for key in OPENAI_REMOVE_FIELDS:
|
|
185
|
+
sanitized.pop(key, None)
|
|
186
|
+
if model and "gpt-5" in model:
|
|
187
|
+
if "max_tokens" in sanitized and "max_completion_tokens" not in sanitized:
|
|
188
|
+
sanitized["max_completion_tokens"] = sanitized.pop("max_tokens")
|
|
189
|
+
else:
|
|
190
|
+
sanitized.pop("max_tokens", None)
|
|
191
|
+
for field in OPENAI_REMOVE_SAMPLING_FIELDS:
|
|
192
|
+
sanitized.pop(field, None)
|
|
193
|
+
sanitized["tool_choice"] = TOOL_CHOICE_FORCE
|
|
194
|
+
sanitized["parallel_tool_calls"] = False
|
|
195
|
+
return sanitized
|
|
196
|
+
|
|
197
|
+
@api.post("/proxy/v1/chat/completions")
|
|
198
|
+
def proxy_chat_completions(request: dict[str, object] = Body(...)):
|
|
199
|
+
key = os.environ.get("OPENAI_API_KEY")
|
|
200
|
+
if not key:
|
|
201
|
+
raise HTTPException(status_code=status.HTTP_503_SERVICE_UNAVAILABLE, detail="OPENAI_API_KEY missing")
|
|
202
|
+
model = request.get("model") if isinstance(request, dict) else None
|
|
203
|
+
payload = _prepare_openai_payload(model if isinstance(model, str) else None, request if isinstance(request, dict) else {})
|
|
204
|
+
headers = {"Authorization": f"Bearer {key}"}
|
|
205
|
+
with httpx.Client(timeout=httpx.Timeout(180.0), follow_redirects=True) as client:
|
|
206
|
+
resp = client.post("https://api.openai.com/v1/chat/completions", json=payload, headers=headers)
|
|
207
|
+
try:
|
|
208
|
+
data = resp.json()
|
|
209
|
+
except Exception:
|
|
210
|
+
data = {"error": "invalid_json", "raw": resp.text[:400]}
|
|
211
|
+
if resp.status_code >= 400:
|
|
212
|
+
from fastapi.responses import JSONResponse
|
|
213
|
+
|
|
214
|
+
return JSONResponse(status_code=resp.status_code, content=data)
|
|
215
|
+
return data
|
|
216
|
+
|
|
217
|
+
# Minimal math rollout endpoint: alternates agent/env; calls inference_url chat/completions
|
|
218
|
+
@api.post("/rollout")
|
|
219
|
+
def rollout(request: dict[str, object] = Body(...)):
|
|
220
|
+
from typing import Any
|
|
221
|
+
import json as _json
|
|
222
|
+
|
|
223
|
+
run_id = str(request.get("run_id"))
|
|
224
|
+
data = request if isinstance(request, dict) else {}
|
|
225
|
+
env = data.get("env") if isinstance(data, dict) else {}
|
|
226
|
+
policy = data.get("policy") if isinstance(data, dict) else {}
|
|
227
|
+
ops = data.get("ops") if isinstance(data, dict) else []
|
|
228
|
+
if not isinstance(ops, list):
|
|
229
|
+
ops = []
|
|
230
|
+
env_name = (env or {}).get("env_name") or "math"
|
|
231
|
+
policy_cfg = (policy or {}).get("config") or {}
|
|
232
|
+
model = policy_cfg.get("model")
|
|
233
|
+
inference_url = (policy_cfg.get("inference_url") or "").rstrip("/")
|
|
234
|
+
|
|
235
|
+
# ALWAYS derive question/answer from Hendrycks dataset using seed/subject
|
|
236
|
+
env_cfg = (env or {}).get("config") or {}
|
|
237
|
+
# Prefer env.seed; fall back to env.config.seed -> default 0
|
|
238
|
+
try:
|
|
239
|
+
seed_val = int((env or {}).get("seed")) if isinstance(env, dict) and (env or {}).get("seed") is not None else 0
|
|
240
|
+
except Exception:
|
|
241
|
+
seed_val = 0
|
|
242
|
+
if seed_val == 0:
|
|
243
|
+
try:
|
|
244
|
+
seed_val = int(env_cfg.get("seed")) if isinstance(env_cfg, dict) and env_cfg.get("seed") is not None else 0
|
|
245
|
+
except Exception:
|
|
246
|
+
seed_val = 0
|
|
247
|
+
subject = (env_cfg.get("subject") if isinstance(env_cfg, dict) else None) or os.getenv("HENDRYCKS_MATH_CONFIG", "default")
|
|
248
|
+
# Load real Hendrycks problem text/solution (download if necessary). Crash on failure.
|
|
249
|
+
qh, ah = _load_hendrycks_problem(seed_val, subject=subject)
|
|
250
|
+
question = qh
|
|
251
|
+
expected_answer = ah
|
|
252
|
+
|
|
253
|
+
def _prepare_payload(m: str | None, payload: dict[str, Any]) -> dict[str, Any]:
|
|
254
|
+
# Remove vendor-specific fields and force tool choice for math interaction
|
|
255
|
+
sanitized = dict(payload)
|
|
256
|
+
for k in ("stop_after_tool_calls", "thinking_mode", "thinking_budget", "reasoning"):
|
|
257
|
+
sanitized.pop(k, None)
|
|
258
|
+
if m and "gpt-5" in m:
|
|
259
|
+
if "max_tokens" in sanitized and "max_completion_tokens" not in sanitized:
|
|
260
|
+
sanitized["max_completion_tokens"] = sanitized.pop("max_tokens")
|
|
261
|
+
else:
|
|
262
|
+
sanitized.pop("max_tokens", None)
|
|
263
|
+
for field in ("temperature", "top_p"):
|
|
264
|
+
sanitized.pop(field, None)
|
|
265
|
+
sanitized["tool_choice"] = {"type": "function", "function": {"name": "interact"}}
|
|
266
|
+
sanitized["parallel_tool_calls"] = False
|
|
267
|
+
return sanitized
|
|
268
|
+
|
|
269
|
+
def _parse_tool_actions(resp: dict[str, Any]) -> list[str]:
|
|
270
|
+
try:
|
|
271
|
+
choices = resp.get("choices")
|
|
272
|
+
if isinstance(choices, list) and choices:
|
|
273
|
+
msg = choices[0].get("message", {}) if isinstance(choices[0], dict) else {}
|
|
274
|
+
tcs = msg.get("tool_calls")
|
|
275
|
+
if isinstance(tcs, list) and tcs:
|
|
276
|
+
fn = tcs[0].get("function", {}) if isinstance(tcs[0], dict) else {}
|
|
277
|
+
args = fn.get("arguments")
|
|
278
|
+
obj = {}
|
|
279
|
+
if isinstance(args, str):
|
|
280
|
+
try:
|
|
281
|
+
obj = _json.loads(args)
|
|
282
|
+
except Exception:
|
|
283
|
+
obj = {}
|
|
284
|
+
elif isinstance(args, dict):
|
|
285
|
+
obj = args
|
|
286
|
+
acts = obj.get("actions")
|
|
287
|
+
if isinstance(acts, list):
|
|
288
|
+
return [str(a) for a in acts][:5]
|
|
289
|
+
except Exception:
|
|
290
|
+
pass
|
|
291
|
+
return []
|
|
292
|
+
|
|
293
|
+
# Build minimal context and execute ops
|
|
294
|
+
history: list[dict[str, Any]] = []
|
|
295
|
+
steps: list[dict[str, Any]] = []
|
|
296
|
+
total_reward = 0.0
|
|
297
|
+
last_llm_text: str | None = None
|
|
298
|
+
last_actions: list[str] = []
|
|
299
|
+
for op in ops or []:
|
|
300
|
+
if op == "agent":
|
|
301
|
+
user_prompt = (
|
|
302
|
+
str(question)
|
|
303
|
+
if isinstance(question, (str, int, float)) and str(question).strip()
|
|
304
|
+
else "Solve the problem. Provide answer steps succinctly."
|
|
305
|
+
)
|
|
306
|
+
payload = {
|
|
307
|
+
"model": model,
|
|
308
|
+
"messages": [{"role": "user", "content": user_prompt}],
|
|
309
|
+
"tools": [{
|
|
310
|
+
"type": "function",
|
|
311
|
+
"function": {"name": "interact", "parameters": {"type": "object", "properties": {"actions": {"type": "array", "items": {"type": "string"}}}, "required": ["actions"]}},
|
|
312
|
+
}],
|
|
313
|
+
"max_tokens": 256,
|
|
314
|
+
"temperature": 0.2,
|
|
315
|
+
}
|
|
316
|
+
to_send = _prepare_payload(model if isinstance(model, str) else None, payload)
|
|
317
|
+
# Print prompts and tools exposed to the model
|
|
318
|
+
try:
|
|
319
|
+
tool_names = []
|
|
320
|
+
for t in (payload.get("tools") or []):
|
|
321
|
+
if isinstance(t, dict):
|
|
322
|
+
fn = (t.get("function") or {}) if isinstance(t.get("function"), dict) else {}
|
|
323
|
+
name = fn.get("name")
|
|
324
|
+
if isinstance(name, str):
|
|
325
|
+
tool_names.append(name)
|
|
326
|
+
print(f"[math] system: <none>", flush=True)
|
|
327
|
+
print(f"[math] user: {user_prompt}", flush=True)
|
|
328
|
+
print(f"[math] tools: {tool_names}", flush=True)
|
|
329
|
+
except Exception:
|
|
330
|
+
pass
|
|
331
|
+
headers = {}
|
|
332
|
+
if "/proxy" in inference_url:
|
|
333
|
+
sk = os.environ.get("SYNTH_API_KEY")
|
|
334
|
+
if sk:
|
|
335
|
+
headers["Authorization"] = f"Bearer {sk}"
|
|
336
|
+
with httpx.Client(timeout=httpx.Timeout(180.0), follow_redirects=True) as client:
|
|
337
|
+
resp = client.post(f"{inference_url}/v1/chat/completions", json=to_send, headers=headers)
|
|
338
|
+
try:
|
|
339
|
+
data = resp.json()
|
|
340
|
+
except Exception:
|
|
341
|
+
data = {"error": "invalid_json", "raw": resp.text[:400]}
|
|
342
|
+
|
|
343
|
+
# Extract assistant text for visibility/correctness
|
|
344
|
+
llm_text = None
|
|
345
|
+
try:
|
|
346
|
+
_choices = data.get("choices") if isinstance(data, dict) else None
|
|
347
|
+
if isinstance(_choices, list) and _choices:
|
|
348
|
+
_msg = _choices[0].get("message", {}) if isinstance(_choices[0], dict) else {}
|
|
349
|
+
if isinstance(_msg, dict):
|
|
350
|
+
_content = _msg.get("content")
|
|
351
|
+
if isinstance(_content, str) and _content.strip():
|
|
352
|
+
llm_text = _content
|
|
353
|
+
except Exception:
|
|
354
|
+
llm_text = None
|
|
355
|
+
|
|
356
|
+
# Print question, model output, and correctness if we have an expected answer
|
|
357
|
+
try:
|
|
358
|
+
if question is not None:
|
|
359
|
+
print(f"[math] question: {question}", flush=True)
|
|
360
|
+
if llm_text is not None:
|
|
361
|
+
print(f"[math] llm: {llm_text}", flush=True)
|
|
362
|
+
if expected_answer is not None and llm_text is not None:
|
|
363
|
+
exp = str(expected_answer).strip()
|
|
364
|
+
got = llm_text.strip()
|
|
365
|
+
is_correct = exp and (exp in got)
|
|
366
|
+
print(f"[math] correct: {bool(is_correct)} (expected fragment: {exp})", flush=True)
|
|
367
|
+
except Exception:
|
|
368
|
+
pass
|
|
369
|
+
last_llm_text = llm_text
|
|
370
|
+
acts = _parse_tool_actions(data) or []
|
|
371
|
+
last_actions = acts if isinstance(acts, list) else []
|
|
372
|
+
steps.append({"obs": {}, "tool_calls": [{"tool_name": "interact", "arguments": _json.dumps({"actions": acts})}], "reward": None, "done": False, "truncated": False, "info": None})
|
|
373
|
+
history.append({"actions": acts})
|
|
374
|
+
elif op == "env":
|
|
375
|
+
# Compute a simple correctness-based reward if expected answer available
|
|
376
|
+
reward_val = 0.0
|
|
377
|
+
try:
|
|
378
|
+
if expected_answer is not None:
|
|
379
|
+
# Prefer explicit tool-call answer from last_actions
|
|
380
|
+
candidate = ""
|
|
381
|
+
if isinstance(last_actions, list) and last_actions:
|
|
382
|
+
# Take the last non-empty action as the final answer
|
|
383
|
+
for s in reversed(last_actions):
|
|
384
|
+
if isinstance(s, str) and s.strip():
|
|
385
|
+
candidate = s.strip()
|
|
386
|
+
break
|
|
387
|
+
# Fallback to parse from llm_text if tool actions absent
|
|
388
|
+
if not candidate and last_llm_text is not None:
|
|
389
|
+
candidate = _extract_boxed(last_llm_text) or last_llm_text
|
|
390
|
+
exp_raw = _extract_boxed(str(expected_answer)) or str(expected_answer)
|
|
391
|
+
got_raw = candidate
|
|
392
|
+
exp_n = _normalize_answer_text(exp_raw)
|
|
393
|
+
got_n = _normalize_answer_text(got_raw)
|
|
394
|
+
if exp_n and exp_n in got_n:
|
|
395
|
+
reward_val = 1.0
|
|
396
|
+
except Exception:
|
|
397
|
+
reward_val = 0.0
|
|
398
|
+
steps.append({"obs": {}, "tool_calls": [], "reward": reward_val, "done": False, "truncated": False, "info": None})
|
|
399
|
+
total_reward += float(reward_val)
|
|
400
|
+
else:
|
|
401
|
+
continue
|
|
402
|
+
|
|
403
|
+
# Compose response similar to SDK contract (simplified)
|
|
404
|
+
return {
|
|
405
|
+
"run_id": run_id,
|
|
406
|
+
"trajectories": [{"env_id": env_name, "policy_id": (policy or {}).get("policy_name") or "math-react", "steps": steps, "final": {"observation": {}}, "length": len(steps)}],
|
|
407
|
+
"branches": {},
|
|
408
|
+
"metrics": {"episode_returns": [total_reward], "mean_return": float(total_reward), "num_steps": len(steps), "num_episodes": 1},
|
|
409
|
+
"aborted": False,
|
|
410
|
+
"ops_executed": len(steps),
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
return api
|
|
414
|
+
|
|
415
|
+
|
|
@@ -166,17 +166,31 @@ class FinetuningDataExtractorV3:
|
|
|
166
166
|
|
|
167
167
|
async def get_session_metrics(self, session_id: str) -> Dict[str, Any]:
|
|
168
168
|
"""Get metrics for a specific session."""
|
|
169
|
-
#
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
FROM events
|
|
169
|
+
# Prefer outcome rewards table if present; fall back to environment event sums
|
|
170
|
+
outcome_query = """
|
|
171
|
+
SELECT COALESCE(MAX(total_reward), 0) as total_reward
|
|
172
|
+
FROM outcome_rewards
|
|
174
173
|
WHERE session_id = :session_id
|
|
175
|
-
AND event_type = 'environment'
|
|
176
|
-
AND reward IS NOT NULL
|
|
177
174
|
"""
|
|
178
|
-
|
|
179
|
-
total_reward
|
|
175
|
+
outcome_df = await self.db_manager.query_traces(outcome_query, {"session_id": session_id})
|
|
176
|
+
total_reward: float = 0.0
|
|
177
|
+
try:
|
|
178
|
+
if not outcome_df.empty:
|
|
179
|
+
total_reward = float(outcome_df['total_reward'].iloc[0] or 0.0)
|
|
180
|
+
except Exception:
|
|
181
|
+
total_reward = 0.0
|
|
182
|
+
|
|
183
|
+
if total_reward == 0.0:
|
|
184
|
+
# Fallback: sum environment rewards
|
|
185
|
+
reward_query = """
|
|
186
|
+
SELECT COALESCE(SUM(reward), 0) as total_reward
|
|
187
|
+
FROM events
|
|
188
|
+
WHERE session_id = :session_id
|
|
189
|
+
AND event_type = 'environment'
|
|
190
|
+
AND reward IS NOT NULL
|
|
191
|
+
"""
|
|
192
|
+
reward_df = await self.db_manager.query_traces(reward_query, {"session_id": session_id})
|
|
193
|
+
total_reward = float(reward_df['total_reward'].iloc[0]) if not reward_df.empty else 0.0
|
|
180
194
|
|
|
181
195
|
# Get total tokens and cost from LM events
|
|
182
196
|
lm_query = """
|
|
@@ -1,12 +1,19 @@
|
|
|
1
1
|
import os # Added to ensure os is available before use
|
|
2
2
|
import sys
|
|
3
3
|
|
|
4
|
-
# Ensure
|
|
5
|
-
# Current file: <repo>/
|
|
6
|
-
# We want to
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
4
|
+
# Ensure repository root is on PYTHONPATH for dev installs
|
|
5
|
+
# Current file path: <repo>/synth_ai/environments/service/app.py
|
|
6
|
+
# We want sys.path to include <repo>, NOT <repo>/synth_ai to avoid shadowing stdlib 'http'
|
|
7
|
+
_pkg_dir = os.path.abspath(os.path.join(os.path.dirname(__file__), "..", ".."))
|
|
8
|
+
_repo_root = os.path.abspath(os.path.join(_pkg_dir, ".."))
|
|
9
|
+
# If the package directory was previously added, remove it to prevent top-level shadowing
|
|
10
|
+
try:
|
|
11
|
+
while _pkg_dir in sys.path:
|
|
12
|
+
sys.path.remove(_pkg_dir)
|
|
13
|
+
except Exception:
|
|
14
|
+
pass
|
|
15
|
+
if _repo_root not in sys.path:
|
|
16
|
+
sys.path.insert(0, _repo_root)
|
|
10
17
|
|
|
11
18
|
print(f"SYS.PATH IN APP.PY: {sys.path}")
|
|
12
19
|
import logging
|
|
@@ -13,8 +13,8 @@ SYNTH_BACKEND_URL = ""
|
|
|
13
13
|
|
|
14
14
|
# Learning V2 Modal Service URLs
|
|
15
15
|
LEARNING_V2_URLS = {
|
|
16
|
-
"dev": "https://synth-laboratories-dev--learning-v2-service-
|
|
17
|
-
"prod": "https://synth-laboratories-prod--learning-v2-service-
|
|
16
|
+
"dev": "https://synth-laboratories-dev--learning-v2-service-fastapi-app.modal.run",
|
|
17
|
+
"prod": "https://synth-laboratories-prod--learning-v2-service-fastapi-app.modal.run",
|
|
18
18
|
"main": "https://synth-laboratories--learning-v2-service-fastapi-app.modal.run"
|
|
19
19
|
}
|
|
20
20
|
|
synth_ai/http.py
CHANGED
|
@@ -1,102 +1,26 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
if
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
self._base_url = base_url.rstrip("/")
|
|
28
|
-
self._api_key = api_key
|
|
29
|
-
self._timeout = aiohttp.ClientTimeout(total=timeout)
|
|
30
|
-
self._session: Optional[aiohttp.ClientSession] = None
|
|
31
|
-
|
|
32
|
-
async def __aenter__(self) -> "AsyncHttpClient":
|
|
33
|
-
if self._session is None:
|
|
34
|
-
headers = {"authorization": f"Bearer {self._api_key}"}
|
|
35
|
-
self._session = aiohttp.ClientSession(headers=headers, timeout=self._timeout)
|
|
36
|
-
return self
|
|
37
|
-
|
|
38
|
-
async def __aexit__(self, exc_type, exc, tb) -> None: # noqa: ANN001
|
|
39
|
-
if self._session is not None:
|
|
40
|
-
await self._session.close()
|
|
41
|
-
self._session = None
|
|
42
|
-
|
|
43
|
-
def _abs(self, path: str) -> str:
|
|
44
|
-
if path.startswith("http://") or path.startswith("https://"):
|
|
45
|
-
return path
|
|
46
|
-
# If base_url already ends with /api and path starts with /api, remove duplicate
|
|
47
|
-
if self._base_url.endswith("/api") and path.startswith("/api"):
|
|
48
|
-
path = path[4:] # Remove leading /api
|
|
49
|
-
return f"{self._base_url}/{path.lstrip('/')}"
|
|
50
|
-
|
|
51
|
-
async def get(self, path: str, *, params: Optional[Dict[str, Any]] = None, headers: Optional[Dict[str, str]] = None) -> Any:
|
|
52
|
-
url = self._abs(path)
|
|
53
|
-
assert self._session is not None, "AsyncHttpClient must be used as an async context manager"
|
|
54
|
-
async with self._session.get(url, params=params, headers=headers) as resp:
|
|
55
|
-
return await self._handle_response(resp, url)
|
|
56
|
-
|
|
57
|
-
async def post_json(self, path: str, *, json: Dict[str, Any], headers: Optional[Dict[str, str]] = None) -> Any:
|
|
58
|
-
url = self._abs(path)
|
|
59
|
-
assert self._session is not None, "AsyncHttpClient must be used as an async context manager"
|
|
60
|
-
async with self._session.post(url, json=json, headers=headers) as resp:
|
|
61
|
-
return await self._handle_response(resp, url)
|
|
62
|
-
|
|
63
|
-
async def post_multipart(self, path: str, *, data: Dict[str, Any], files: Dict[str, tuple[str, bytes, str | None]], headers: Optional[Dict[str, str]] = None) -> Any:
|
|
64
|
-
url = self._abs(path)
|
|
65
|
-
assert self._session is not None, "AsyncHttpClient must be used as an async context manager"
|
|
66
|
-
form = aiohttp.FormData()
|
|
67
|
-
for k, v in data.items():
|
|
68
|
-
form.add_field(k, str(v))
|
|
69
|
-
for field, (filename, content, content_type) in files.items():
|
|
70
|
-
form.add_field(field, content, filename=filename, content_type=content_type or "application/octet-stream")
|
|
71
|
-
async with self._session.post(url, data=form, headers=headers) as resp:
|
|
72
|
-
return await self._handle_response(resp, url)
|
|
73
|
-
|
|
74
|
-
async def delete(self, path: str, *, headers: Optional[Dict[str, str]] = None) -> Any:
|
|
75
|
-
url = self._abs(path)
|
|
76
|
-
assert self._session is not None, "AsyncHttpClient must be used as an async context manager"
|
|
77
|
-
async with self._session.delete(url, headers=headers) as resp:
|
|
78
|
-
return await self._handle_response(resp, url)
|
|
79
|
-
|
|
80
|
-
async def _handle_response(self, resp: aiohttp.ClientResponse, url: str) -> Any:
|
|
81
|
-
text = await resp.text()
|
|
82
|
-
body_snippet = text[:200] if text else None
|
|
83
|
-
if 200 <= resp.status < 300:
|
|
84
|
-
ctype = resp.headers.get("content-type", "")
|
|
85
|
-
if "application/json" in ctype:
|
|
86
|
-
try:
|
|
87
|
-
return await resp.json()
|
|
88
|
-
except Exception:
|
|
89
|
-
# Fallback to text
|
|
90
|
-
return text
|
|
91
|
-
return text
|
|
92
|
-
# error
|
|
93
|
-
detail: Any | None = None
|
|
94
|
-
try:
|
|
95
|
-
detail = await resp.json()
|
|
96
|
-
except Exception:
|
|
97
|
-
detail = None
|
|
98
|
-
raise HTTPError(status=resp.status, url=url, message="request_failed", body_snippet=body_snippet, detail=detail)
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
async def sleep(seconds: float) -> None:
|
|
102
|
-
await asyncio.sleep(seconds)
|
|
1
|
+
"""
|
|
2
|
+
Compatibility shim to avoid shadowing Python's stdlib `http` module.
|
|
3
|
+
This re-exports the actual client implementation from http_client.py and
|
|
4
|
+
supports both package and script execution contexts.
|
|
5
|
+
"""
|
|
6
|
+
|
|
7
|
+
try:
|
|
8
|
+
from synth_ai.http_client import * # type: ignore F401,F403
|
|
9
|
+
except Exception:
|
|
10
|
+
try:
|
|
11
|
+
from .http_client import * # type: ignore F401,F403
|
|
12
|
+
except Exception:
|
|
13
|
+
import importlib.util as _ilu
|
|
14
|
+
import sys as _sys
|
|
15
|
+
from pathlib import Path as _Path
|
|
16
|
+
|
|
17
|
+
_here = _Path(__file__).resolve()
|
|
18
|
+
_client_path = _here.parent / "http_client.py"
|
|
19
|
+
_spec = _ilu.spec_from_file_location("http_client", str(_client_path))
|
|
20
|
+
if not _spec or not _spec.loader:
|
|
21
|
+
raise ImportError("Could not load http_client module")
|
|
22
|
+
_mod = _ilu.module_from_spec(_spec)
|
|
23
|
+
_spec.loader.exec_module(_mod)
|
|
24
|
+
_sys.modules["synth_ai.http_client"] = _mod
|
|
25
|
+
for _name in ("HTTPError", "AsyncHttpClient", "sleep"):
|
|
26
|
+
globals()[_name] = getattr(_mod, _name)
|