synth-ai 0.2.8.dev9__py3-none-any.whl → 0.2.8.dev11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of synth-ai might be problematic. Click here for more details.
- synth_ai/demos/core/cli.py +53 -0
- synth_ai/demos/demo_task_apps/math/modal_task_app.py +124 -109
- {synth_ai-0.2.8.dev9.dist-info → synth_ai-0.2.8.dev11.dist-info}/METADATA +1 -1
- {synth_ai-0.2.8.dev9.dist-info → synth_ai-0.2.8.dev11.dist-info}/RECORD +8 -8
- {synth_ai-0.2.8.dev9.dist-info → synth_ai-0.2.8.dev11.dist-info}/WHEEL +0 -0
- {synth_ai-0.2.8.dev9.dist-info → synth_ai-0.2.8.dev11.dist-info}/entry_points.txt +0 -0
- {synth_ai-0.2.8.dev9.dist-info → synth_ai-0.2.8.dev11.dist-info}/licenses/LICENSE +0 -0
- {synth_ai-0.2.8.dev9.dist-info → synth_ai-0.2.8.dev11.dist-info}/top_level.txt +0 -0
synth_ai/demos/core/cli.py
CHANGED
|
@@ -1216,6 +1216,59 @@ def cmd_run(args: argparse.Namespace) -> int:
|
|
|
1216
1216
|
except Exception:
|
|
1217
1217
|
print(str(js))
|
|
1218
1218
|
print("Request body was:\n" + json.dumps(body, indent=2))
|
|
1219
|
+
try:
|
|
1220
|
+
auth_preview = _key_preview(env.synth_api_key or "", "SYNTH_API_KEY (auth)")
|
|
1221
|
+
print(f"[run] {auth_preview}")
|
|
1222
|
+
except Exception:
|
|
1223
|
+
pass
|
|
1224
|
+
try:
|
|
1225
|
+
data_block = body.get("data") if isinstance(body, dict) else None
|
|
1226
|
+
env_key_body = ""
|
|
1227
|
+
if isinstance(data_block, dict):
|
|
1228
|
+
env_key_body = str(data_block.get("environment_api_key") or "")
|
|
1229
|
+
if env_key_body:
|
|
1230
|
+
print(f"[run] {_key_preview(env_key_body, 'environment_api_key (body)')}")
|
|
1231
|
+
except Exception:
|
|
1232
|
+
pass
|
|
1233
|
+
try:
|
|
1234
|
+
current_env_key = env.env_api_key or ""
|
|
1235
|
+
if current_env_key:
|
|
1236
|
+
print(f"[run] {_key_preview(current_env_key, 'ENVIRONMENT_API_KEY (current)')}")
|
|
1237
|
+
except Exception:
|
|
1238
|
+
pass
|
|
1239
|
+
if isinstance(js, dict):
|
|
1240
|
+
detail = js.get("detail")
|
|
1241
|
+
if isinstance(detail, dict):
|
|
1242
|
+
try:
|
|
1243
|
+
sent_key = detail.get("sent_key")
|
|
1244
|
+
if isinstance(sent_key, str):
|
|
1245
|
+
print(f"[run] Backend detail.sent_key {_key_preview(sent_key, 'detail.sent_key')}")
|
|
1246
|
+
except Exception:
|
|
1247
|
+
pass
|
|
1248
|
+
try:
|
|
1249
|
+
sent_keys = detail.get("sent_keys")
|
|
1250
|
+
if isinstance(sent_keys, (list, tuple)):
|
|
1251
|
+
previews = []
|
|
1252
|
+
for idx, val in enumerate(sent_keys):
|
|
1253
|
+
if isinstance(val, str):
|
|
1254
|
+
previews.append(_key_preview(val, f"detail.sent_keys[{idx}]"))
|
|
1255
|
+
if previews:
|
|
1256
|
+
joined = "; ".join(previews)
|
|
1257
|
+
print(f"[run] Backend detail.sent_keys previews: {joined}")
|
|
1258
|
+
except Exception:
|
|
1259
|
+
pass
|
|
1260
|
+
try:
|
|
1261
|
+
key_prefix = detail.get("sent_key_prefix")
|
|
1262
|
+
if isinstance(key_prefix, str):
|
|
1263
|
+
print(f"[run] Backend detail.sent_key_prefix={key_prefix}")
|
|
1264
|
+
except Exception:
|
|
1265
|
+
pass
|
|
1266
|
+
try:
|
|
1267
|
+
health_url = detail.get("health_url")
|
|
1268
|
+
if isinstance(health_url, str):
|
|
1269
|
+
print(f"[run] Backend detail.health_url={health_url}")
|
|
1270
|
+
except Exception:
|
|
1271
|
+
pass
|
|
1219
1272
|
# Extra hints for auth failures
|
|
1220
1273
|
try:
|
|
1221
1274
|
sk = (env.synth_api_key or "").strip()
|
|
@@ -358,122 +358,137 @@ def fastapi_app():
|
|
|
358
358
|
pass
|
|
359
359
|
return []
|
|
360
360
|
|
|
361
|
-
#
|
|
361
|
+
# Single-step rollout: one agent call followed by evaluation of the returned tool answer
|
|
362
362
|
history: list[dict[str, Any]] = []
|
|
363
363
|
steps: list[dict[str, Any]] = []
|
|
364
364
|
total_reward = 0.0
|
|
365
|
-
last_llm_text: str | None = None
|
|
366
|
-
last_actions: list[str] = []
|
|
367
|
-
for op in ops or []:
|
|
368
|
-
if op == "agent":
|
|
369
|
-
user_prompt = (
|
|
370
|
-
str(question)
|
|
371
|
-
if isinstance(question, (str, int, float)) and str(question).strip()
|
|
372
|
-
else "Solve the problem. Provide answer steps succinctly."
|
|
373
|
-
)
|
|
374
|
-
payload = {
|
|
375
|
-
"model": model,
|
|
376
|
-
"messages": [{"role": "user", "content": user_prompt}],
|
|
377
|
-
"tools": [{
|
|
378
|
-
"type": "function",
|
|
379
|
-
"function": {"name": "interact", "parameters": {"type": "object", "properties": {"actions": {"type": "array", "items": {"type": "string"}}}, "required": ["actions"]}},
|
|
380
|
-
}],
|
|
381
|
-
"max_tokens": 256,
|
|
382
|
-
"temperature": 0.2,
|
|
383
|
-
}
|
|
384
|
-
to_send = _prepare_payload(model if isinstance(model, str) else None, payload)
|
|
385
|
-
# Print prompts and tools exposed to the model
|
|
386
|
-
try:
|
|
387
|
-
tool_names = []
|
|
388
|
-
for t in (payload.get("tools") or []):
|
|
389
|
-
if isinstance(t, dict):
|
|
390
|
-
fn = (t.get("function") or {}) if isinstance(t.get("function"), dict) else {}
|
|
391
|
-
name = fn.get("name")
|
|
392
|
-
if isinstance(name, str):
|
|
393
|
-
tool_names.append(name)
|
|
394
|
-
print(f"[math] system: <none>", flush=True)
|
|
395
|
-
print(f"[math] user: {user_prompt}", flush=True)
|
|
396
|
-
print(f"[math] tools: {tool_names}", flush=True)
|
|
397
|
-
except Exception:
|
|
398
|
-
pass
|
|
399
|
-
headers = {}
|
|
400
|
-
if "/proxy" in inference_url:
|
|
401
|
-
sk = os.environ.get("SYNTH_API_KEY")
|
|
402
|
-
if sk:
|
|
403
|
-
headers["Authorization"] = f"Bearer {sk}"
|
|
404
|
-
with httpx.Client(timeout=httpx.Timeout(180.0), follow_redirects=True) as client:
|
|
405
|
-
resp = client.post(f"{inference_url}/v1/chat/completions", json=to_send, headers=headers)
|
|
406
|
-
try:
|
|
407
|
-
data = resp.json()
|
|
408
|
-
except Exception:
|
|
409
|
-
data = {"error": "invalid_json", "raw": resp.text[:400]}
|
|
410
|
-
|
|
411
|
-
# Extract assistant text for visibility/correctness
|
|
412
|
-
llm_text = None
|
|
413
|
-
try:
|
|
414
|
-
_choices = data.get("choices") if isinstance(data, dict) else None
|
|
415
|
-
if isinstance(_choices, list) and _choices:
|
|
416
|
-
_msg = _choices[0].get("message", {}) if isinstance(_choices[0], dict) else {}
|
|
417
|
-
if isinstance(_msg, dict):
|
|
418
|
-
_content = _msg.get("content")
|
|
419
|
-
if isinstance(_content, str) and _content.strip():
|
|
420
|
-
llm_text = _content
|
|
421
|
-
except Exception:
|
|
422
|
-
llm_text = None
|
|
423
|
-
|
|
424
|
-
# Print question, model output, and correctness if we have an expected answer
|
|
425
|
-
try:
|
|
426
|
-
if question is not None:
|
|
427
|
-
print(f"[math] question: {question}", flush=True)
|
|
428
|
-
if llm_text is not None:
|
|
429
|
-
print(f"[math] llm: {llm_text}", flush=True)
|
|
430
|
-
if expected_answer is not None and llm_text is not None:
|
|
431
|
-
exp = str(expected_answer).strip()
|
|
432
|
-
got = llm_text.strip()
|
|
433
|
-
is_correct = exp and (exp in got)
|
|
434
|
-
print(f"[math] correct: {bool(is_correct)} (expected fragment: {exp})", flush=True)
|
|
435
|
-
except Exception:
|
|
436
|
-
pass
|
|
437
|
-
last_llm_text = llm_text
|
|
438
|
-
acts = _parse_tool_actions(data) or []
|
|
439
|
-
last_actions = acts if isinstance(acts, list) else []
|
|
440
|
-
steps.append({"obs": {}, "tool_calls": [{"tool_name": "interact", "arguments": _json.dumps({"actions": acts})}], "reward": None, "done": False, "truncated": False, "info": None})
|
|
441
|
-
history.append({"actions": acts})
|
|
442
|
-
elif op == "env":
|
|
443
|
-
# Compute a simple correctness-based reward if expected answer available
|
|
444
|
-
reward_val = 0.0
|
|
445
|
-
try:
|
|
446
|
-
if expected_answer is not None:
|
|
447
|
-
# Prefer explicit tool-call answer from last_actions
|
|
448
|
-
candidate = ""
|
|
449
|
-
if isinstance(last_actions, list) and last_actions:
|
|
450
|
-
# Take the last non-empty action as the final answer
|
|
451
|
-
for s in reversed(last_actions):
|
|
452
|
-
if isinstance(s, str) and s.strip():
|
|
453
|
-
candidate = s.strip()
|
|
454
|
-
break
|
|
455
|
-
# Fallback to parse from llm_text if tool actions absent
|
|
456
|
-
if not candidate and last_llm_text is not None:
|
|
457
|
-
candidate = _extract_boxed(last_llm_text) or last_llm_text
|
|
458
|
-
exp_raw = _extract_boxed(str(expected_answer)) or str(expected_answer)
|
|
459
|
-
got_raw = candidate
|
|
460
|
-
exp_n = _normalize_answer_text(exp_raw)
|
|
461
|
-
got_n = _normalize_answer_text(got_raw)
|
|
462
|
-
if exp_n and exp_n in got_n:
|
|
463
|
-
reward_val = 1.0
|
|
464
|
-
except Exception:
|
|
465
|
-
reward_val = 0.0
|
|
466
|
-
steps.append({"obs": {}, "tool_calls": [], "reward": reward_val, "done": False, "truncated": False, "info": None})
|
|
467
|
-
total_reward += float(reward_val)
|
|
468
|
-
else:
|
|
469
|
-
continue
|
|
470
365
|
|
|
471
|
-
|
|
366
|
+
user_prompt = (
|
|
367
|
+
str(question)
|
|
368
|
+
if isinstance(question, (str, int, float)) and str(question).strip()
|
|
369
|
+
else "Solve the problem. Provide answer steps succinctly."
|
|
370
|
+
)
|
|
371
|
+
payload = {
|
|
372
|
+
"model": model,
|
|
373
|
+
"messages": [{"role": "user", "content": user_prompt}],
|
|
374
|
+
"tools": [{
|
|
375
|
+
"type": "function",
|
|
376
|
+
"function": {"name": "interact", "parameters": {"type": "object", "properties": {"actions": {"type": "array", "items": {"type": "string"}}}, "required": ["actions"]}},
|
|
377
|
+
}],
|
|
378
|
+
"max_tokens": 256,
|
|
379
|
+
"temperature": 0.2,
|
|
380
|
+
}
|
|
381
|
+
to_send = _prepare_payload(model if isinstance(model, str) else None, payload)
|
|
382
|
+
|
|
383
|
+
try:
|
|
384
|
+
tool_names = []
|
|
385
|
+
for t in (payload.get("tools") or []):
|
|
386
|
+
if isinstance(t, dict):
|
|
387
|
+
fn = (t.get("function") or {}) if isinstance(t.get("function"), dict) else {}
|
|
388
|
+
name = fn.get("name")
|
|
389
|
+
if isinstance(name, str):
|
|
390
|
+
tool_names.append(name)
|
|
391
|
+
print(f"[math] system: <none>", flush=True)
|
|
392
|
+
print(f"[math] user: {user_prompt}", flush=True)
|
|
393
|
+
print(f"[math] tools: {tool_names}", flush=True)
|
|
394
|
+
except Exception:
|
|
395
|
+
pass
|
|
396
|
+
|
|
397
|
+
headers = {}
|
|
398
|
+
if "/proxy" in inference_url:
|
|
399
|
+
sk = os.environ.get("SYNTH_API_KEY")
|
|
400
|
+
if sk:
|
|
401
|
+
headers["Authorization"] = f"Bearer {sk}"
|
|
402
|
+
with httpx.Client(timeout=httpx.Timeout(180.0), follow_redirects=True) as client:
|
|
403
|
+
resp = client.post(f"{inference_url}/v1/chat/completions", json=to_send, headers=headers)
|
|
404
|
+
try:
|
|
405
|
+
data = resp.json()
|
|
406
|
+
except Exception:
|
|
407
|
+
data = {"error": "invalid_json", "raw": resp.text[:400]}
|
|
408
|
+
|
|
409
|
+
llm_text = None
|
|
410
|
+
try:
|
|
411
|
+
_choices = data.get("choices") if isinstance(data, dict) else None
|
|
412
|
+
if isinstance(_choices, list) and _choices:
|
|
413
|
+
_msg = _choices[0].get("message", {}) if isinstance(_choices[0], dict) else {}
|
|
414
|
+
if isinstance(_msg, dict):
|
|
415
|
+
_content = _msg.get("content")
|
|
416
|
+
if isinstance(_content, str) and _content.strip():
|
|
417
|
+
llm_text = _content
|
|
418
|
+
except Exception:
|
|
419
|
+
llm_text = None
|
|
420
|
+
|
|
421
|
+
try:
|
|
422
|
+
if question is not None:
|
|
423
|
+
print(f"[math] question: {question}", flush=True)
|
|
424
|
+
if llm_text is not None:
|
|
425
|
+
print(f"[math] llm: {llm_text}", flush=True)
|
|
426
|
+
if expected_answer is not None and llm_text is not None:
|
|
427
|
+
exp = str(expected_answer).strip()
|
|
428
|
+
got = llm_text.strip()
|
|
429
|
+
is_correct = exp and (exp in got)
|
|
430
|
+
print(f"[math] correct: {bool(is_correct)} (expected fragment: {exp})", flush=True)
|
|
431
|
+
except Exception:
|
|
432
|
+
pass
|
|
433
|
+
|
|
434
|
+
tool_actions = _parse_tool_actions(data)
|
|
435
|
+
history.append({"actions": tool_actions})
|
|
436
|
+
steps.append({
|
|
437
|
+
"obs": {},
|
|
438
|
+
"tool_calls": [{"tool_name": "interact", "arguments": _json.dumps({"actions": tool_actions})}],
|
|
439
|
+
"reward": None,
|
|
440
|
+
"done": False,
|
|
441
|
+
"truncated": False,
|
|
442
|
+
"info": None,
|
|
443
|
+
})
|
|
444
|
+
|
|
445
|
+
# Evaluate answer correctness using tool output (or fall back to assistant text)
|
|
446
|
+
reward_val = 0.0
|
|
447
|
+
candidate = ""
|
|
448
|
+
try:
|
|
449
|
+
if isinstance(tool_actions, list):
|
|
450
|
+
for s in reversed(tool_actions):
|
|
451
|
+
if isinstance(s, str) and s.strip():
|
|
452
|
+
candidate = s.strip()
|
|
453
|
+
break
|
|
454
|
+
if not candidate and llm_text is not None:
|
|
455
|
+
candidate = _extract_boxed(llm_text) or llm_text
|
|
456
|
+
if expected_answer is not None:
|
|
457
|
+
exp_raw = _extract_boxed(str(expected_answer)) or str(expected_answer)
|
|
458
|
+
got_raw = candidate
|
|
459
|
+
exp_n = _normalize_answer_text(exp_raw)
|
|
460
|
+
got_n = _normalize_answer_text(got_raw)
|
|
461
|
+
if exp_n and exp_n in got_n:
|
|
462
|
+
reward_val = 1.0
|
|
463
|
+
except Exception:
|
|
464
|
+
reward_val = 0.0
|
|
465
|
+
|
|
466
|
+
total_reward += float(reward_val)
|
|
467
|
+
steps.append({
|
|
468
|
+
"obs": {},
|
|
469
|
+
"tool_calls": [],
|
|
470
|
+
"reward": reward_val,
|
|
471
|
+
"done": True,
|
|
472
|
+
"truncated": False,
|
|
473
|
+
"info": None,
|
|
474
|
+
})
|
|
475
|
+
|
|
472
476
|
return {
|
|
473
477
|
"run_id": run_id,
|
|
474
|
-
"trajectories": [{
|
|
478
|
+
"trajectories": [{
|
|
479
|
+
"env_id": env_name,
|
|
480
|
+
"policy_id": (policy or {}).get("policy_name") or "math-react",
|
|
481
|
+
"steps": steps,
|
|
482
|
+
"final": {"observation": {}},
|
|
483
|
+
"length": len(steps),
|
|
484
|
+
}],
|
|
475
485
|
"branches": {},
|
|
476
|
-
"metrics": {
|
|
486
|
+
"metrics": {
|
|
487
|
+
"episode_returns": [total_reward],
|
|
488
|
+
"mean_return": float(total_reward),
|
|
489
|
+
"num_steps": len(steps),
|
|
490
|
+
"num_episodes": 1,
|
|
491
|
+
},
|
|
477
492
|
"aborted": False,
|
|
478
493
|
"ops_executed": len(steps),
|
|
479
494
|
}
|
|
@@ -21,7 +21,7 @@ synth_ai/config/base_url.py,sha256=c85LaABBrvsl8Fp8KH0LNtJJrpnUwlzA5Ywbuth8fHE,3
|
|
|
21
21
|
synth_ai/core/experiment.py,sha256=hLkPtzUFA7iY3-QpeJ5K8YjvQeyfqnjab5P2CFaojys,236
|
|
22
22
|
synth_ai/core/system.py,sha256=s-Z7np2ISYmYc1r9YN-y2yb3cgRlOalrh0iaqnxeo84,206
|
|
23
23
|
synth_ai/demos/core/__init__.py,sha256=A2FjhY7KXGtyzdQXqeTPCkEhHfrH-eQg6bvP8HaYhZM,36
|
|
24
|
-
synth_ai/demos/core/cli.py,sha256=
|
|
24
|
+
synth_ai/demos/core/cli.py,sha256=Qak_hShq0kphuKxnLqCcAvSVTFqJ2sr8ZaH2MgXveKY,56422
|
|
25
25
|
synth_ai/demos/demo_task_apps/__init__.py,sha256=8aUGEGpWUw11GRb3wQXRAmQ99yjAt5qd5FCTDJpXWjI,44
|
|
26
26
|
synth_ai/demos/demo_task_apps/core.py,sha256=Eu7gp0VtZ9tE1HPLG14-pkjC1cD_7brsdl2IRbdSBts,14764
|
|
27
27
|
synth_ai/demos/demo_task_apps/math/__init__.py,sha256=WBzpZwSn7pRarBmhopQi34i9bEm05-71eM3siboOavY,43
|
|
@@ -30,7 +30,7 @@ synth_ai/demos/demo_task_apps/math/app.py,sha256=gNopoAhwM0vzdKuCa7AwQqSwiV2xagr
|
|
|
30
30
|
synth_ai/demos/demo_task_apps/math/config.toml,sha256=Kxrzuyj7Az5mvzXaipPIyngKTDqphohf6uSWOHCF5cw,2105
|
|
31
31
|
synth_ai/demos/demo_task_apps/math/deploy_modal.py,sha256=O4745sFuGEZTsygl-mz6ZOFJ7mog8CquXMgMyjFKr_c,2288
|
|
32
32
|
synth_ai/demos/demo_task_apps/math/deploy_task_app.sh,sha256=qVffbAmsiCAxzFDzcxNVF4f7yyLWnmqPc1cNydHT5BQ,791
|
|
33
|
-
synth_ai/demos/demo_task_apps/math/modal_task_app.py,sha256=
|
|
33
|
+
synth_ai/demos/demo_task_apps/math/modal_task_app.py,sha256=eJtNUZvO0foucqQ5-jHOygu3VaffrUEHn9jd1_9ncdM,20112
|
|
34
34
|
synth_ai/environments/__init__.py,sha256=BQW0Nc_BFQq_N-pcqTyJVjW56kSEXu7XZyaSer-U95Q,1032
|
|
35
35
|
synth_ai/environments/environment/__init__.py,sha256=EBol9AKxPTIPXWcbH9Tja-l3yL-N2kB8e5atyf6F66c,31
|
|
36
36
|
synth_ai/environments/environment/core.py,sha256=0jd0CZ88_s_qqA3d1lOgVsnv-ucw_1lJDAIUj1gTSt0,2201
|
|
@@ -412,9 +412,9 @@ synth_ai/v0/tracing_v1/events/manage.py,sha256=ZDXXP-ZwLH9LCsmw7Ru9o55d7bl_diPtJ
|
|
|
412
412
|
synth_ai/v0/tracing_v1/events/scope.py,sha256=BuBkhSpVHUJt8iGT9HJZF82rbb88mQcd2vM2shg-w2I,2550
|
|
413
413
|
synth_ai/v0/tracing_v1/events/store.py,sha256=0342lvAcalyJbVEIzQFaPuMQGgwiFm7M5rE6gr-G0E8,9041
|
|
414
414
|
synth_ai/zyk/__init__.py,sha256=htVLnzTYQ5rxzYpzSYBm7_o6uNKZ3pB_PrqkBrgTRS4,771
|
|
415
|
-
synth_ai-0.2.8.
|
|
416
|
-
synth_ai-0.2.8.
|
|
417
|
-
synth_ai-0.2.8.
|
|
418
|
-
synth_ai-0.2.8.
|
|
419
|
-
synth_ai-0.2.8.
|
|
420
|
-
synth_ai-0.2.8.
|
|
415
|
+
synth_ai-0.2.8.dev11.dist-info/licenses/LICENSE,sha256=ynhjRQUfqA_RdGRATApfFA_fBAy9cno04sLtLUqxVFM,1069
|
|
416
|
+
synth_ai-0.2.8.dev11.dist-info/METADATA,sha256=ibi9UBO3d-6_HewGfYvGBR01F-HxD5G-lYV-on2Gp8s,5153
|
|
417
|
+
synth_ai-0.2.8.dev11.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
|
|
418
|
+
synth_ai-0.2.8.dev11.dist-info/entry_points.txt,sha256=Neq-3bT7TAijjgOIR77pKL-WYg6TWBDeO8pp_nL4vGY,91
|
|
419
|
+
synth_ai-0.2.8.dev11.dist-info/top_level.txt,sha256=fBmtZyVHuKaGa29oHBaaUkrUIWTqSpoVMPiVdCDP3k8,9
|
|
420
|
+
synth_ai-0.2.8.dev11.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|