synth-ai 0.2.14__py3-none-any.whl → 0.2.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (236) hide show
  1. examples/README.md +1 -0
  2. examples/multi_step/SFT_README.md +147 -0
  3. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +9 -9
  4. examples/multi_step/configs/crafter_sft_qwen30b_lora.toml +62 -0
  5. examples/multi_step/convert_traces_to_sft.py +84 -0
  6. examples/multi_step/run_sft_qwen30b.sh +45 -0
  7. examples/qwen_coder/configs/coder_lora_30b.toml +2 -1
  8. examples/qwen_coder/configs/coder_lora_4b.toml +2 -1
  9. examples/qwen_coder/configs/coder_lora_small.toml +2 -1
  10. examples/qwen_vl/BUGS_AND_FIXES.md +232 -0
  11. examples/qwen_vl/IMAGE_VALIDATION_COMPLETE.md +271 -0
  12. examples/qwen_vl/IMAGE_VALIDATION_SUMMARY.md +260 -0
  13. examples/qwen_vl/INFERENCE_SFT_TESTS.md +412 -0
  14. examples/qwen_vl/NEXT_STEPS_2B.md +325 -0
  15. examples/qwen_vl/QUICKSTART.md +327 -0
  16. examples/qwen_vl/QUICKSTART_RL_VISION.md +110 -0
  17. examples/qwen_vl/README.md +154 -0
  18. examples/qwen_vl/RL_VISION_COMPLETE.md +475 -0
  19. examples/qwen_vl/RL_VISION_TESTING.md +333 -0
  20. examples/qwen_vl/SDK_VISION_INTEGRATION.md +328 -0
  21. examples/qwen_vl/SETUP_COMPLETE.md +275 -0
  22. examples/qwen_vl/VISION_TESTS_COMPLETE.md +490 -0
  23. examples/qwen_vl/VLM_PIPELINE_COMPLETE.md +242 -0
  24. examples/qwen_vl/__init__.py +2 -0
  25. examples/qwen_vl/collect_data_via_cli.md +423 -0
  26. examples/qwen_vl/collect_vision_traces.py +368 -0
  27. examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +127 -0
  28. examples/qwen_vl/configs/crafter_vlm_sft_example.toml +60 -0
  29. examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +43 -0
  30. examples/qwen_vl/configs/eval_gpt4o_vision_proper.toml +29 -0
  31. examples/qwen_vl/configs/eval_gpt5nano_vision.toml +45 -0
  32. examples/qwen_vl/configs/eval_qwen2vl_vision.toml +44 -0
  33. examples/qwen_vl/configs/filter_qwen2vl_sft.toml +50 -0
  34. examples/qwen_vl/configs/filter_vision_sft.toml +53 -0
  35. examples/qwen_vl/configs/filter_vision_test.toml +8 -0
  36. examples/qwen_vl/configs/sft_qwen3_vl_2b_test.toml +54 -0
  37. examples/qwen_vl/crafter_gpt5nano_agent.py +308 -0
  38. examples/qwen_vl/crafter_qwen_vl_agent.py +300 -0
  39. examples/qwen_vl/run_vision_comparison.sh +62 -0
  40. examples/qwen_vl/run_vision_sft_pipeline.sh +175 -0
  41. examples/qwen_vl/test_image_validation.py +201 -0
  42. examples/qwen_vl/test_sft_vision_data.py +110 -0
  43. examples/rl/README.md +1 -1
  44. examples/rl/configs/eval_base_qwen.toml +17 -0
  45. examples/rl/configs/eval_rl_qwen.toml +13 -0
  46. examples/rl/configs/rl_from_base_qwen.toml +37 -0
  47. examples/rl/configs/rl_from_base_qwen17.toml +76 -0
  48. examples/rl/configs/rl_from_ft_qwen.toml +37 -0
  49. examples/rl/run_eval.py +436 -0
  50. examples/rl/run_rl_and_save.py +111 -0
  51. examples/rl/task_app/README.md +22 -0
  52. examples/rl/task_app/math_single_step.py +990 -0
  53. examples/rl/task_app/math_task_app.py +111 -0
  54. examples/sft/README.md +5 -5
  55. examples/sft/configs/crafter_fft_qwen0p6b.toml +4 -2
  56. examples/sft/configs/crafter_lora_qwen0p6b.toml +4 -3
  57. examples/sft/evaluate.py +2 -4
  58. examples/sft/export_dataset.py +7 -4
  59. examples/swe/task_app/README.md +1 -1
  60. examples/swe/task_app/grpo_swe_mini.py +0 -1
  61. examples/swe/task_app/grpo_swe_mini_task_app.py +0 -12
  62. examples/swe/task_app/hosted/envs/mini_swe/environment.py +13 -13
  63. examples/swe/task_app/hosted/policy_routes.py +0 -2
  64. examples/swe/task_app/hosted/rollout.py +0 -8
  65. examples/task_apps/crafter/task_app/grpo_crafter.py +4 -7
  66. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +59 -1
  67. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +30 -0
  68. examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +62 -31
  69. examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +16 -14
  70. examples/task_apps/enron/__init__.py +1 -0
  71. examples/vlm/README.md +3 -3
  72. examples/vlm/configs/crafter_vlm_gpt4o.toml +2 -0
  73. examples/vlm/crafter_openai_vlm_agent.py +3 -5
  74. examples/vlm/filter_image_rows.py +1 -1
  75. examples/vlm/run_crafter_vlm_benchmark.py +2 -2
  76. examples/warming_up_to_rl/_utils.py +92 -0
  77. examples/warming_up_to_rl/analyze_trace_db.py +1 -1
  78. examples/warming_up_to_rl/configs/crafter_fft.toml +2 -0
  79. examples/warming_up_to_rl/configs/crafter_fft_4b.toml +2 -0
  80. examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +2 -0
  81. examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +2 -0
  82. examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +2 -1
  83. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +2 -1
  84. examples/warming_up_to_rl/configs/rl_from_ft.toml +2 -0
  85. examples/warming_up_to_rl/export_trace_sft.py +174 -60
  86. examples/warming_up_to_rl/readme.md +63 -132
  87. examples/warming_up_to_rl/run_fft_and_save.py +1 -1
  88. examples/warming_up_to_rl/run_rl_and_save.py +1 -1
  89. examples/warming_up_to_rl/task_app/README.md +42 -0
  90. examples/warming_up_to_rl/task_app/grpo_crafter.py +696 -0
  91. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +135 -0
  92. examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
  93. examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
  94. examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +143 -0
  95. examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1226 -0
  96. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
  97. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
  98. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
  99. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +522 -0
  100. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +478 -0
  101. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +108 -0
  102. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
  103. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
  104. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +204 -0
  105. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
  106. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +618 -0
  107. examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +100 -0
  108. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +1081 -0
  109. examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +195 -0
  110. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1861 -0
  111. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
  112. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +211 -0
  113. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +161 -0
  114. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +137 -0
  115. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +62 -0
  116. synth_ai/__init__.py +44 -30
  117. synth_ai/_utils/__init__.py +47 -0
  118. synth_ai/_utils/base_url.py +10 -0
  119. synth_ai/_utils/http.py +10 -0
  120. synth_ai/_utils/prompts.py +10 -0
  121. synth_ai/_utils/task_app_state.py +12 -0
  122. synth_ai/_utils/user_config.py +10 -0
  123. synth_ai/api/models/supported.py +144 -7
  124. synth_ai/api/train/__init__.py +13 -1
  125. synth_ai/api/train/cli.py +30 -7
  126. synth_ai/api/train/config_finder.py +18 -11
  127. synth_ai/api/train/env_resolver.py +13 -10
  128. synth_ai/cli/__init__.py +62 -78
  129. synth_ai/cli/_modal_wrapper.py +7 -5
  130. synth_ai/cli/_typer_patch.py +0 -2
  131. synth_ai/cli/_validate_task_app.py +22 -4
  132. synth_ai/cli/legacy_root_backup.py +3 -1
  133. synth_ai/cli/lib/__init__.py +10 -0
  134. synth_ai/cli/lib/task_app_discovery.py +7 -0
  135. synth_ai/cli/lib/task_app_env.py +518 -0
  136. synth_ai/cli/recent.py +2 -1
  137. synth_ai/cli/setup.py +266 -0
  138. synth_ai/cli/status.py +1 -1
  139. synth_ai/cli/task_app_deploy.py +16 -0
  140. synth_ai/cli/task_app_list.py +25 -0
  141. synth_ai/cli/task_app_modal_serve.py +16 -0
  142. synth_ai/cli/task_app_serve.py +18 -0
  143. synth_ai/cli/task_apps.py +71 -31
  144. synth_ai/cli/traces.py +1 -1
  145. synth_ai/cli/train.py +18 -0
  146. synth_ai/cli/tui.py +7 -2
  147. synth_ai/cli/turso.py +1 -1
  148. synth_ai/cli/watch.py +1 -1
  149. synth_ai/demos/__init__.py +10 -0
  150. synth_ai/demos/core/__init__.py +28 -1
  151. synth_ai/demos/crafter/__init__.py +1 -0
  152. synth_ai/demos/crafter/crafter_fft_4b.toml +55 -0
  153. synth_ai/demos/crafter/grpo_crafter_task_app.py +185 -0
  154. synth_ai/demos/crafter/rl_from_base_qwen4b.toml +74 -0
  155. synth_ai/demos/demo_registry.py +176 -0
  156. synth_ai/demos/math/__init__.py +1 -0
  157. synth_ai/demos/math/_common.py +16 -0
  158. synth_ai/demos/math/app.py +38 -0
  159. synth_ai/demos/math/config.toml +76 -0
  160. synth_ai/demos/math/deploy_modal.py +54 -0
  161. synth_ai/demos/math/modal_task_app.py +702 -0
  162. synth_ai/demos/math/task_app_entry.py +51 -0
  163. synth_ai/environments/environment/core.py +7 -1
  164. synth_ai/environments/examples/bandit/engine.py +0 -1
  165. synth_ai/environments/examples/bandit/environment.py +0 -1
  166. synth_ai/environments/examples/wordle/environment.py +0 -1
  167. synth_ai/evals/base.py +16 -5
  168. synth_ai/evals/client.py +1 -1
  169. synth_ai/inference/client.py +1 -1
  170. synth_ai/judge_schemas.py +8 -8
  171. synth_ai/learning/client.py +1 -1
  172. synth_ai/learning/health.py +1 -1
  173. synth_ai/learning/jobs.py +1 -1
  174. synth_ai/learning/rl/client.py +1 -1
  175. synth_ai/learning/rl/env_keys.py +1 -1
  176. synth_ai/learning/rl/secrets.py +1 -1
  177. synth_ai/learning/sft/client.py +1 -1
  178. synth_ai/learning/sft/data.py +407 -4
  179. synth_ai/learning/validators.py +4 -1
  180. synth_ai/task/apps/__init__.py +4 -2
  181. synth_ai/task/config.py +6 -4
  182. synth_ai/task/rubrics/__init__.py +1 -2
  183. synth_ai/task/rubrics/loaders.py +14 -10
  184. synth_ai/task/rubrics.py +219 -0
  185. synth_ai/task/trace_correlation_helpers.py +24 -11
  186. synth_ai/task/tracing_utils.py +14 -3
  187. synth_ai/task/validators.py +2 -3
  188. synth_ai/tracing_v3/abstractions.py +3 -3
  189. synth_ai/tracing_v3/config.py +15 -13
  190. synth_ai/tracing_v3/constants.py +21 -0
  191. synth_ai/tracing_v3/db_config.py +3 -1
  192. synth_ai/tracing_v3/decorators.py +10 -7
  193. synth_ai/tracing_v3/llm_call_record_helpers.py +5 -5
  194. synth_ai/tracing_v3/session_tracer.py +7 -7
  195. synth_ai/tracing_v3/storage/base.py +29 -29
  196. synth_ai/tracing_v3/storage/config.py +3 -3
  197. synth_ai/tracing_v3/turso/daemon.py +8 -9
  198. synth_ai/tracing_v3/turso/native_manager.py +80 -72
  199. synth_ai/tracing_v3/utils.py +2 -2
  200. synth_ai/tui/cli/query_experiments.py +4 -4
  201. synth_ai/tui/cli/query_experiments_v3.py +4 -4
  202. synth_ai/tui/dashboard.py +14 -9
  203. synth_ai/utils/__init__.py +101 -0
  204. synth_ai/utils/base_url.py +94 -0
  205. synth_ai/utils/cli.py +131 -0
  206. synth_ai/utils/env.py +287 -0
  207. synth_ai/utils/http.py +169 -0
  208. synth_ai/utils/modal.py +308 -0
  209. synth_ai/utils/process.py +212 -0
  210. synth_ai/utils/prompts.py +39 -0
  211. synth_ai/utils/sqld.py +122 -0
  212. synth_ai/utils/task_app_discovery.py +882 -0
  213. synth_ai/utils/task_app_env.py +186 -0
  214. synth_ai/utils/task_app_state.py +318 -0
  215. synth_ai/utils/user_config.py +137 -0
  216. synth_ai/v0/config/__init__.py +1 -5
  217. synth_ai/v0/config/base_url.py +1 -7
  218. synth_ai/v0/tracing/config.py +1 -1
  219. synth_ai/v0/tracing/decorators.py +1 -1
  220. synth_ai/v0/tracing/upload.py +1 -1
  221. synth_ai/v0/tracing_v1/config.py +1 -1
  222. synth_ai/v0/tracing_v1/decorators.py +1 -1
  223. synth_ai/v0/tracing_v1/upload.py +1 -1
  224. {synth_ai-0.2.14.dist-info → synth_ai-0.2.16.dist-info}/METADATA +85 -31
  225. {synth_ai-0.2.14.dist-info → synth_ai-0.2.16.dist-info}/RECORD +229 -117
  226. synth_ai/cli/man.py +0 -106
  227. synth_ai/compound/cais.py +0 -0
  228. synth_ai/core/experiment.py +0 -13
  229. synth_ai/core/system.py +0 -15
  230. synth_ai/demo_registry.py +0 -295
  231. synth_ai/handshake.py +0 -109
  232. synth_ai/http.py +0 -26
  233. {synth_ai-0.2.14.dist-info → synth_ai-0.2.16.dist-info}/WHEEL +0 -0
  234. {synth_ai-0.2.14.dist-info → synth_ai-0.2.16.dist-info}/entry_points.txt +0 -0
  235. {synth_ai-0.2.14.dist-info → synth_ai-0.2.16.dist-info}/licenses/LICENSE +0 -0
  236. {synth_ai-0.2.14.dist-info → synth_ai-0.2.16.dist-info}/top_level.txt +0 -0
synth_ai/cli/setup.py ADDED
@@ -0,0 +1,266 @@
1
+ from __future__ import annotations
2
+
3
+ import contextlib
4
+ import os
5
+ import time
6
+ import webbrowser
7
+ from pathlib import Path
8
+ from typing import Any, cast
9
+ from urllib.parse import urljoin, urlsplit, urlunsplit
10
+
11
+ import requests
12
+ from click.exceptions import Exit
13
+ from synth_ai.demos import core as demo_core
14
+ from synth_ai.utils.cli import print_next_step
15
+ from synth_ai.utils.env import mask_str
16
+ from synth_ai.utils.modal import is_modal_public_url
17
+ from synth_ai.utils.process import popen_capture
18
+ from synth_ai.utils.user_config import USER_CONFIG_PATH, update_user_config
19
+
20
+
21
+ class HandshakeError(Exception):
22
+ pass
23
+
24
+
25
+ def _get_canonical_origin() -> str:
26
+ """Resolve the dashboard origin for the browser handshake.
27
+
28
+ Priority order:
29
+ 1. Explicit ``SYNTH_CANONICAL_ORIGIN`` override.
30
+ 2. Development flag ``SYNTH_CANONICAL_DEV`` (case-insensitive truthy) → localhost.
31
+ 3. Production dashboard at ``https://www.usesynth.ai/dashboard``.
32
+ """
33
+
34
+ override = (os.getenv("SYNTH_CANONICAL_ORIGIN") or "").strip()
35
+ if override:
36
+ return override.rstrip("/")
37
+
38
+ dev_flag = (os.getenv("SYNTH_CANONICAL_DEV") or "").strip().lower()
39
+ if dev_flag in { "1", "true", "yes", "on" }:
40
+ print("USING DEV ORIGIN")
41
+ return "http://localhost:3000"
42
+
43
+ return "https://www.usesynth.ai/dashboard"
44
+
45
+
46
+ def _split_origin(origin: str) -> tuple[str, str]:
47
+ parsed = urlsplit(origin)
48
+ bare = cast(str, urlunsplit((parsed.scheme, parsed.netloc, "", "", "")))
49
+ path = parsed.path.rstrip("/")
50
+ return bare, path
51
+
52
+
53
+ def _ensure_verification_uri(data: dict[str, Any], base_with_path: str) -> None:
54
+ uri = data.get("verification_uri")
55
+ if not isinstance(uri, str) or not uri:
56
+ return
57
+ if uri.startswith("http://") or uri.startswith("https://"):
58
+ return
59
+ data["verification_uri"] = urljoin(base_with_path.rstrip("/") + "/", uri.lstrip("/"))
60
+
61
+
62
+ def _start_handshake_session(origin: str | None = None) -> tuple[str, str, int, int]:
63
+ base = (origin or _get_canonical_origin()).rstrip("/")
64
+ api_origin, _ = _split_origin(base)
65
+ url = urljoin(api_origin.rstrip("/") + "/", "api/sdk/handshake/init")
66
+ r = requests.post(url, timeout=10)
67
+ if r.status_code != 200:
68
+ raise HandshakeError(f"init failed: {r.status_code} {r.text}")
69
+ try:
70
+ data = r.json()
71
+ except ValueError as exc: # pragma: no cover - network dependent
72
+ raise HandshakeError(f"init returned malformed JSON: {exc}") from exc
73
+ _ensure_verification_uri(data, base)
74
+ return (
75
+ str(data.get("device_code")),
76
+ str(data.get("verification_uri")),
77
+ int(data.get("expires_in", 600)),
78
+ int(data.get("interval", 3)),
79
+ )
80
+
81
+
82
+ def _poll_handshake_token(
83
+ device_code: str, origin: str | None = None, *, timeout_s: int | None = None
84
+ ) -> dict[str, Any]:
85
+ base = (origin or _get_canonical_origin()).rstrip("/")
86
+ api_origin, _ = _split_origin(base)
87
+ url = urljoin(api_origin.rstrip("/") + "/", "api/sdk/handshake/token")
88
+ deadline = time.time() + (timeout_s or 600)
89
+ while True:
90
+ if time.time() > deadline:
91
+ raise HandshakeError("handshake timed out")
92
+ try:
93
+ r = requests.post(url, json={"device_code": device_code}, timeout=10)
94
+ except Exception:
95
+ time.sleep(2)
96
+ continue
97
+ if r.status_code == 200:
98
+ try:
99
+ data = r.json()
100
+ except ValueError as exc: # pragma: no cover - network dependent
101
+ raise HandshakeError(f"token returned malformed JSON: {exc}") from exc
102
+ _ensure_verification_uri(data, base)
103
+ return data
104
+ elif r.status_code in (404, 410):
105
+ raise HandshakeError(f"handshake failed: {r.status_code}")
106
+ # 428 authorization_pending or others → wait and retry
107
+ time.sleep(2)
108
+
109
+
110
+ def _run_handshake(origin: str | None = None) -> dict[str, Any]:
111
+ device_code, verification_uri, expires_in, interval = _start_handshake_session(origin)
112
+ with contextlib.suppress(Exception):
113
+ webbrowser.open(verification_uri)
114
+ return _poll_handshake_token(device_code, origin, timeout_s=expires_in)
115
+
116
+
117
+
118
+ def setup() -> int:
119
+ # Prefer the demo directory provided in the current shell session, then fall back to persisted state
120
+ demo_dir_env = (os.environ.get("DEMO_DIR") or "").strip()
121
+ demo_dir: str | None = None
122
+ if demo_dir_env:
123
+ candidate = Path(demo_dir_env).expanduser()
124
+ if candidate.is_dir():
125
+ demo_dir = str(candidate.resolve())
126
+ else:
127
+ print(f"Warning: DEMO_DIR={demo_dir_env} does not exist; falling back to stored demo directory.")
128
+
129
+ if demo_dir is None:
130
+ loaded = demo_core.load_demo_dir()
131
+ if loaded:
132
+ demo_dir = loaded
133
+
134
+ if demo_dir and os.path.isdir(demo_dir):
135
+ os.chdir(demo_dir)
136
+ print(f"Using demo directory: {demo_dir}")
137
+
138
+ synth_key = ""
139
+ rl_env_key = ""
140
+ org_name = ""
141
+
142
+ try:
143
+ print("\n⏳ Connecting to your browser session…")
144
+ res = _run_handshake()
145
+ org = res.get("org") or {}
146
+ keys = res.get("keys") or {}
147
+ synth_key = str(keys.get("synth") or "").strip()
148
+ rl_env_key = str(keys.get("rl_env") or "").strip()
149
+ org_name = org.get("name") or "Unamed Organization ™️"
150
+ print(f"✅ Connected to {org_name}!")
151
+ except (HandshakeError, Exception) as exc:
152
+ print(f"⚠️ Failed to fetch keys from frontend: {exc}")
153
+ print("Falling back to manual entry...")
154
+
155
+ if not synth_key:
156
+ try:
157
+ synth_key = input(
158
+ "Failed to fetch your Synth API key. Please enter your Synth API key here:\n> "
159
+ ).strip()
160
+ except (EOFError, KeyboardInterrupt):
161
+ print("\nSetup cancelled.")
162
+ return 1
163
+ if not synth_key:
164
+ print("Synth API key is required.")
165
+ return 1
166
+
167
+ if not rl_env_key:
168
+ try:
169
+ rl_env_key = input(
170
+ "Failed to fetch your Environment API key. Please enter your Environment API key here:\n> "
171
+ ).strip()
172
+ except (EOFError, KeyboardInterrupt):
173
+ print("\nSetup cancelled.")
174
+ return 1
175
+ if not rl_env_key:
176
+ print("Environment API key is required.")
177
+ return 1
178
+
179
+ # Persist keys to user config
180
+ config_updates = {
181
+ "SYNTH_API_KEY": synth_key,
182
+ "ENVIRONMENT_API_KEY": rl_env_key,
183
+ }
184
+ update_user_config(config_updates)
185
+
186
+ os.environ["SYNTH_API_KEY"] = synth_key
187
+ os.environ["ENVIRONMENT_API_KEY"] = rl_env_key
188
+
189
+ env = demo_core.load_env()
190
+
191
+ def _refresh_env() -> None:
192
+ nonlocal env
193
+ env = demo_core.load_env()
194
+
195
+ def _maybe_fix_task_url() -> None:
196
+ if not env.task_app_name:
197
+ return
198
+ current = env.task_app_base_url
199
+ needs_lookup = not current or not is_modal_public_url(current)
200
+ if not needs_lookup:
201
+ return
202
+ code, out = popen_capture(
203
+ [
204
+ "uv",
205
+ "run",
206
+ "python",
207
+ "-m",
208
+ "modal",
209
+ "app",
210
+ "url",
211
+ env.task_app_name,
212
+ ]
213
+ )
214
+ if code != 0 or not out:
215
+ return
216
+ new_url = ""
217
+ for token in out.split():
218
+ if is_modal_public_url(token):
219
+ new_url = token.strip().rstrip("/")
220
+ break
221
+ if new_url and new_url != current:
222
+ print(f"Updating TASK_APP_BASE_URL from Modal CLI → {new_url}")
223
+ persist_path = demo_dir or os.getcwd()
224
+ demo_core.persist_task_url(new_url, name=env.task_app_name, path=persist_path)
225
+ os.environ["TASK_APP_BASE_URL"] = new_url
226
+ _refresh_env()
227
+
228
+ modal_ok, modal_msg = demo_core.modal_auth_status()
229
+ if modal_ok:
230
+ print(f"✓ Modal authenticated: {modal_msg}")
231
+ else:
232
+ print(f"[setup] Modal authentication status: {modal_msg}")
233
+
234
+ _maybe_fix_task_url()
235
+
236
+ if env.dev_backend_url:
237
+ api = env.dev_backend_url.rstrip("/") + (
238
+ "" if env.dev_backend_url.endswith("/api") else "/api"
239
+ )
240
+ demo_core.assert_http_ok(api + "/health", method="GET")
241
+ if env.task_app_base_url:
242
+ base = env.task_app_base_url.rstrip("/")
243
+ demo_core.assert_http_ok(
244
+ base + "/health", method="GET"
245
+ ) or demo_core.assert_http_ok(
246
+ base, method="GET"
247
+ )
248
+ print("\nSaved keys:")
249
+ print(f" SYNTH_API_KEY={mask_str(synth_key)}")
250
+ print(f" ENVIRONMENT_API_KEY={mask_str(rl_env_key)}")
251
+ if env.task_app_base_url:
252
+ print(f" TASK_APP_BASE_URL={env.task_app_base_url}")
253
+ print(f"Configuration persisted to: {USER_CONFIG_PATH}")
254
+
255
+ demo_core.persist_demo_dir(os.getcwd())
256
+
257
+ print_next_step("deploy our task app", ["uvx synth-ai deploy"])
258
+ return 0
259
+
260
+
261
+ def register(group):
262
+ @group.command("setup")
263
+ def demo_setup():
264
+ code = setup()
265
+ if code:
266
+ raise Exit(code)
synth_ai/cli/status.py CHANGED
@@ -12,7 +12,7 @@ from rich.console import Console
12
12
  from rich.panel import Panel
13
13
  from rich.table import Table
14
14
 
15
- from synth_ai.cli._storage import load_storage
15
+ from ._storage import load_storage
16
16
 
17
17
 
18
18
  async def _db_stats(db_url: str) -> dict:
@@ -0,0 +1,16 @@
1
+ """Compatibility wrapper for task-app deploy command."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import click
6
+
7
+ from .task_apps import task_app_group
8
+
9
+ _deploy = task_app_group.commands.get("deploy")
10
+
11
+ if _deploy is None:
12
+ raise RuntimeError("task_app_group does not define a 'deploy' command")
13
+
14
+ deploy_command: click.Command = _deploy
15
+
16
+ __all__ = ["deploy_command"]
@@ -0,0 +1,25 @@
1
+ """Task app list command."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import click
6
+ from synth_ai.task.apps import registry
7
+
8
+ from .task_apps import task_app_group
9
+
10
+
11
+ @task_app_group.command("list")
12
+ def list_apps() -> None:
13
+ """List registered task apps."""
14
+
15
+ entries = registry.list()
16
+ if not entries:
17
+ click.echo("No task apps registered.")
18
+ return
19
+
20
+ for entry in entries:
21
+ aliases = f" (aliases: {', '.join(entry.aliases)})" if entry.aliases else ""
22
+ click.echo(f"- {entry.app_id}{aliases}: {entry.description}")
23
+
24
+
25
+ __all__ = ["list_apps"]
@@ -0,0 +1,16 @@
1
+ """Compatibility wrapper for task-app modal-serve command."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import click
6
+
7
+ from .task_apps import task_app_group
8
+
9
+ _modal_serve = task_app_group.commands.get("modal-serve")
10
+
11
+ if _modal_serve is None:
12
+ raise RuntimeError("task_app_group does not define a 'modal-serve' command")
13
+
14
+ modal_serve_command: click.Command = _modal_serve
15
+
16
+ __all__ = ["modal_serve_command"]
@@ -0,0 +1,18 @@
1
+ """Compatibility wrapper for task-app serve command."""
2
+
3
+ from __future__ import annotations
4
+
5
+ import click
6
+
7
+ from .task_apps import serve_command as task_app_serve_command
8
+ from .task_apps import task_app_group
9
+
10
+ serve_command = task_app_serve_command
11
+
12
+ _group_serve = task_app_group.commands.get("serve")
13
+ if _group_serve is None:
14
+ raise RuntimeError("task_app_group does not define a 'serve' command")
15
+
16
+ serve_task_group: click.Command = _group_serve
17
+
18
+ __all__ = ["serve_command", "serve_task_group"]
synth_ai/cli/task_apps.py CHANGED
@@ -24,9 +24,9 @@ import types
24
24
  import uuid
25
25
  from collections.abc import Callable, Iterable, Iterator, Sequence
26
26
  from dataclasses import dataclass
27
- from datetime import datetime, timezone
27
+ from datetime import UTC, datetime
28
28
  from pathlib import Path
29
- from typing import Any, Optional, cast
29
+ from typing import Any, cast
30
30
 
31
31
  try: # Python 3.11+
32
32
  import tomllib as _toml
@@ -92,14 +92,14 @@ except Exception as exc: # pragma: no cover - critical dependency
92
92
  raise RuntimeError("Unable to load task app server utilities") from exc
93
93
 
94
94
 
95
- def _load_demo_directory() -> Optional[Path]:
95
+ def _load_demo_directory() -> Path | None:
96
96
  """Return the demo task apps directory if available."""
97
97
 
98
98
  try:
99
99
  module = cast(
100
100
  Any, importlib.import_module("synth_ai.demos.demo_task_apps.core")
101
101
  )
102
- loader = cast(Callable[[], Optional[str | Path]], module.load_demo_dir)
102
+ loader = cast(Callable[[], str | Path | None], module.load_demo_dir)
103
103
  demo_dir = loader()
104
104
  if isinstance(demo_dir, str | Path):
105
105
  demo_path = Path(demo_dir)
@@ -139,7 +139,7 @@ DEFAULT_SEARCH_RELATIVE = (
139
139
  )
140
140
 
141
141
 
142
- def _pearson(xs: Sequence[float], ys: Sequence[float]) -> Optional[float]:
142
+ def _pearson(xs: Sequence[float], ys: Sequence[float]) -> float | None:
143
143
  if len(xs) != len(ys) or len(xs) < 2:
144
144
  return None
145
145
  mean_x = sum(xs) / len(xs)
@@ -164,7 +164,7 @@ class AppChoice:
164
164
  label: str
165
165
  path: Path
166
166
  source: str
167
- description: Optional[str] = None
167
+ description: str | None = None
168
168
  aliases: tuple[str, ...] = ()
169
169
  entry: TaskAppEntryType | None = None
170
170
  entry_loader: Callable[[], TaskAppEntryType] | None = None
@@ -188,21 +188,21 @@ class JudgeSpec:
188
188
  kwargs: dict[str, Any]
189
189
 
190
190
 
191
- def _parse_datetime_for_trace(value: Any) -> Optional[datetime]:
191
+ def _parse_datetime_for_trace(value: Any) -> datetime | None:
192
192
  if isinstance(value, datetime):
193
- return value if value.tzinfo else value.replace(tzinfo=timezone.utc)
193
+ return value if value.tzinfo else value.replace(tzinfo=UTC)
194
194
  if isinstance(value, str):
195
195
  value = value.replace("Z", "+00:00")
196
196
  try:
197
197
  dt = datetime.fromisoformat(value)
198
198
  except ValueError:
199
199
  try:
200
- dt = datetime.fromtimestamp(float(value), tz=timezone.utc)
200
+ dt = datetime.fromtimestamp(float(value), tz=UTC)
201
201
  except Exception:
202
202
  return None
203
- return dt if dt.tzinfo else dt.replace(tzinfo=timezone.utc)
203
+ return dt if dt.tzinfo else dt.replace(tzinfo=UTC)
204
204
  if isinstance(value, int | float):
205
- return datetime.fromtimestamp(float(value), tz=timezone.utc)
205
+ return datetime.fromtimestamp(float(value), tz=UTC)
206
206
  return None
207
207
 
208
208
 
@@ -241,6 +241,24 @@ def _event_from_dict(payload: dict[str, Any]) -> BaseEvent:
241
241
  system_state_after=payload.get("system_state_after"),
242
242
  **base_kwargs,
243
243
  )
244
+ # Check for LM CAIS event fields
245
+ if any(key in payload for key in ("model_name", "provider", "call_records")):
246
+ from synth_ai.tracing_v3.abstractions import LMCAISEvent
247
+ # Note: call_records are left as dicts - the storage layer will handle serialization
248
+ call_records = payload.get("call_records") or []
249
+ return LMCAISEvent(
250
+ model_name=payload.get("model_name", ""),
251
+ provider=payload.get("provider", ""),
252
+ input_tokens=payload.get("input_tokens"),
253
+ output_tokens=payload.get("output_tokens"),
254
+ total_tokens=payload.get("total_tokens"),
255
+ cost_usd=payload.get("cost_usd"),
256
+ latency_ms=payload.get("latency_ms"),
257
+ span_id=payload.get("span_id"),
258
+ trace_id=payload.get("trace_id"),
259
+ call_records=call_records,
260
+ **base_kwargs,
261
+ )
244
262
  return BaseEvent(**base_kwargs)
245
263
 
246
264
 
@@ -279,7 +297,7 @@ def _step_from_dict(payload: dict[str, Any]) -> SessionTimeStep:
279
297
  for msg in payload.get("markov_blanket_messages", [])
280
298
  if isinstance(msg, dict)
281
299
  ]
282
- timestamp = _parse_datetime_for_trace(payload.get("timestamp")) or datetime.now(timezone.utc)
300
+ timestamp = _parse_datetime_for_trace(payload.get("timestamp")) or datetime.now(UTC)
283
301
  completed_at = _parse_datetime_for_trace(payload.get("completed_at"))
284
302
  return SessionTimeStep(
285
303
  step_id=payload.get("step_id", ""),
@@ -293,7 +311,7 @@ def _step_from_dict(payload: dict[str, Any]) -> SessionTimeStep:
293
311
  )
294
312
 
295
313
 
296
- def _session_trace_from_dict(payload: dict[str, Any]) -> Optional[V3SessionTrace]:
314
+ def _session_trace_from_dict(payload: dict[str, Any]) -> V3SessionTrace | None:
297
315
  if not isinstance(payload, dict):
298
316
  return None
299
317
  steps = [
@@ -311,7 +329,7 @@ def _session_trace_from_dict(payload: dict[str, Any]) -> Optional[V3SessionTrace
311
329
  for msg in payload.get("markov_blanket_message_history", [])
312
330
  if isinstance(msg, dict)
313
331
  ]
314
- created_at = _parse_datetime_for_trace(payload.get("created_at")) or datetime.now(timezone.utc)
332
+ created_at = _parse_datetime_for_trace(payload.get("created_at")) or datetime.now(UTC)
315
333
  metadata = payload.get("metadata") or {}
316
334
  session_metadata = payload.get("session_metadata")
317
335
  return V3SessionTrace(
@@ -341,10 +359,18 @@ async def _store_trace(
341
359
 
342
360
  _logger.info(f"[STORE_TRACE_DEBUG] trace_namespace keys: {list(trace_namespace.keys())}")
343
361
 
362
+ # Handle both formats:
363
+ # - With session_trace key: {"session_trace": {...}}
364
+ # - Without session_trace key (trace itself is the session): {"session_id": ..., "markov_blanket_message_history": ...}
344
365
  session_payload = trace_namespace.get("session_trace")
345
366
  if not isinstance(session_payload, dict):
346
- _logger.warning(f"[STORE_TRACE_DEBUG] No session_trace found or wrong type: {type(session_payload)}")
347
- return
367
+ # If no session_trace key, assume "full" format where trace itself is the session_trace
368
+ if "session_id" in trace_namespace:
369
+ session_payload = trace_namespace
370
+ _logger.info("[STORE_TRACE_DEBUG] Using trace_namespace directly as session_payload (no session_trace key)")
371
+ else:
372
+ _logger.warning(f"[STORE_TRACE_DEBUG] No session_trace found or wrong type: {type(session_payload)}")
373
+ return
348
374
 
349
375
  _logger.info(f"[STORE_TRACE_DEBUG] session_payload keys: {list(session_payload.keys())}")
350
376
  msg_count = len(session_payload.get("markov_blanket_message_history", []))
@@ -352,7 +378,7 @@ async def _store_trace(
352
378
 
353
379
  trace_obj = _session_trace_from_dict(session_payload)
354
380
  if trace_obj is None:
355
- _logger.warning(f"[STORE_TRACE_DEBUG] _session_trace_from_dict returned None")
381
+ _logger.warning("[STORE_TRACE_DEBUG] _session_trace_from_dict returned None")
356
382
  return
357
383
 
358
384
  _logger.info(f"[STORE_TRACE_DEBUG] Created SessionTrace object with {len(trace_obj.markov_blanket_message_history)} messages")
@@ -366,7 +392,7 @@ async def _store_trace(
366
392
 
367
393
  _logger.info(f"[STORE_TRACE_DEBUG] Calling insert_session_trace for session_id={trace_obj.session_id}")
368
394
  await tracer.db.insert_session_trace(trace_obj)
369
- _logger.info(f"[STORE_TRACE_DEBUG] Successfully inserted trace")
395
+ _logger.info("[STORE_TRACE_DEBUG] Successfully inserted trace")
370
396
 
371
397
  def _temporary_sys_path(paths: Sequence[Path]):
372
398
  """Context manager to prepend entries to sys.path temporarily."""
@@ -913,43 +939,43 @@ def _build_modal_config_from_ast(modal_call: ast.Call) -> ModalDeploymentConfigT
913
939
  for kw in modal_call.keywords:
914
940
  if kw.arg and isinstance(kw.value, ast.Constant):
915
941
  kwargs[kw.arg] = kw.value.value
916
- elif kw.arg == "pip_packages" and isinstance(kw.value, (ast.List, ast.Tuple)):
942
+ elif kw.arg == "pip_packages" and isinstance(kw.value, ast.List | ast.Tuple):
917
943
  # Handle pip_packages list/tuple
918
944
  packages: list[str] = []
919
945
  value_node = kw.value
920
- if isinstance(value_node, (ast.List, ast.Tuple)):
946
+ if isinstance(value_node, ast.List | ast.Tuple):
921
947
  for elt in value_node.elts:
922
948
  if isinstance(elt, ast.Constant):
923
949
  packages.append(elt.value)
924
950
  kwargs[kw.arg] = tuple(packages)
925
- elif kw.arg == "extra_local_dirs" and isinstance(kw.value, (ast.List, ast.Tuple)):
951
+ elif kw.arg == "extra_local_dirs" and isinstance(kw.value, ast.List | ast.Tuple):
926
952
  # Handle extra_local_dirs list/tuple of tuples
927
953
  dirs = []
928
954
  value_node = kw.value
929
- if isinstance(value_node, (ast.List, ast.Tuple)):
955
+ if isinstance(value_node, ast.List | ast.Tuple):
930
956
  for elt in value_node.elts:
931
- if isinstance(elt, (ast.List, ast.Tuple)) and len(elt.elts) == 2:
957
+ if isinstance(elt, ast.List | ast.Tuple) and len(elt.elts) == 2:
932
958
  src = elt.elts[0].value if isinstance(elt.elts[0], ast.Constant) else None
933
959
  dst = elt.elts[1].value if isinstance(elt.elts[1], ast.Constant) else None
934
960
  if src and dst:
935
961
  dirs.append((src, dst))
936
962
  kwargs[kw.arg] = tuple(dirs)
937
- elif kw.arg == "secret_names" and isinstance(kw.value, (ast.List, ast.Tuple)):
963
+ elif kw.arg == "secret_names" and isinstance(kw.value, ast.List | ast.Tuple):
938
964
  # Handle secret_names list/tuple
939
965
  secrets = []
940
966
  value_node = kw.value
941
- if isinstance(value_node, (ast.List, ast.Tuple)):
967
+ if isinstance(value_node, ast.List | ast.Tuple):
942
968
  for elt in value_node.elts:
943
969
  if isinstance(elt, ast.Constant):
944
970
  secrets.append(elt.value)
945
971
  kwargs[kw.arg] = tuple(secrets)
946
- elif kw.arg == "volume_mounts" and isinstance(kw.value, (ast.List, ast.Tuple)):
972
+ elif kw.arg == "volume_mounts" and isinstance(kw.value, ast.List | ast.Tuple):
947
973
  # Handle volume_mounts list/tuple of tuples
948
974
  mounts = []
949
975
  value_node = kw.value
950
- if isinstance(value_node, (ast.List, ast.Tuple)):
976
+ if isinstance(value_node, ast.List | ast.Tuple):
951
977
  for elt in value_node.elts:
952
- if isinstance(elt, (ast.List, ast.Tuple)) and len(elt.elts) == 2:
978
+ if isinstance(elt, ast.List | ast.Tuple) and len(elt.elts) == 2:
953
979
  name = elt.elts[0].value if isinstance(elt.elts[0], ast.Constant) else None
954
980
  mount = elt.elts[1].value if isinstance(elt.elts[1], ast.Constant) else None
955
981
  if name and mount:
@@ -2245,7 +2271,7 @@ def validate_task_app_cmd(
2245
2271
  import time
2246
2272
 
2247
2273
  # Import the validate_task_app function defined in this module
2248
- from synth_ai.cli._validate_task_app import validate_task_app # type: ignore[attr-defined]
2274
+ from ._validate_task_app import validate_task_app # type: ignore[attr-defined]
2249
2275
 
2250
2276
  proc = None
2251
2277
  task_app_url = url
@@ -4442,6 +4468,10 @@ def filter_command(config_path: str) -> None:
4442
4468
  except Exception:
4443
4469
  user_content = content_raw
4444
4470
 
4471
+ # If user_content is a message dict with a 'content' key, extract it
4472
+ if isinstance(user_content, dict) and "content" in user_content:
4473
+ user_content = user_content["content"]
4474
+
4445
4475
  # Extract text from structured content
4446
4476
  def extract_text(content: Any) -> str:
4447
4477
  if isinstance(content, str):
@@ -4472,21 +4502,31 @@ def filter_command(config_path: str) -> None:
4472
4502
 
4473
4503
  # For assistant, we might not have it recorded, so use tool calls as completion
4474
4504
  assistant_text = ""
4505
+ assistant_content = None
4475
4506
  if assistant_msg:
4476
4507
  assistant_content_raw = assistant_msg.get("content")
4477
4508
  try:
4478
4509
  assistant_content = json.loads(assistant_content_raw) if isinstance(assistant_content_raw, str) else assistant_content_raw
4479
4510
  except Exception:
4480
4511
  assistant_content = assistant_content_raw
4512
+
4513
+ # If assistant_content is a message dict with a 'content' key, extract it
4514
+ if isinstance(assistant_content, dict) and "content" in assistant_content:
4515
+ assistant_content = assistant_content["content"]
4516
+
4481
4517
  assistant_text = extract_text(assistant_content)
4482
4518
 
4483
4519
  if not user_text:
4484
4520
  continue
4485
4521
 
4522
+ # Use full multimodal content if it's a list (contains images), otherwise use text
4523
+ user_content_for_message = user_content if isinstance(user_content, list) else user_text
4524
+ assistant_content_for_message = assistant_content if isinstance(assistant_content, list) else (assistant_text if assistant_text else "[no response recorded]")
4525
+
4486
4526
  record = {
4487
4527
  "messages": [
4488
- {"role": "user", "content": user_text},
4489
- {"role": "assistant", "content": assistant_text if assistant_text else "[no response recorded]"},
4528
+ {"role": "user", "content": user_content_for_message},
4529
+ {"role": "assistant", "content": assistant_content_for_message},
4490
4530
  ],
4491
4531
  "metadata": {
4492
4532
  "session_id": session_id,
synth_ai/cli/traces.py CHANGED
@@ -11,7 +11,7 @@ from rich import box
11
11
  from rich.console import Console
12
12
  from rich.table import Table
13
13
 
14
- from synth_ai.cli._storage import load_storage
14
+ from ._storage import load_storage
15
15
 
16
16
 
17
17
  def register(cli):
synth_ai/cli/train.py ADDED
@@ -0,0 +1,18 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import Any
4
+
5
+ from synth_ai.api.train.cli import register as _register
6
+ from synth_ai.api.train.cli import train_command as _train_command
7
+
8
+ __all__ = ["register", "train_command"]
9
+
10
+
11
+ def register(cli: Any) -> None:
12
+ """Compatibility wrapper for the legacy train CLI location."""
13
+
14
+ _register(cli)
15
+
16
+
17
+ def train_command(*args: Any, **kwargs: Any) -> Any:
18
+ return _train_command(*args, **kwargs)