synth-ai 0.2.13.dev2__py3-none-any.whl → 0.2.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (293) hide show
  1. examples/README.md +1 -0
  2. examples/multi_step/SFT_README.md +147 -0
  3. examples/multi_step/configs/README_verilog_rl.md +77 -0
  4. examples/multi_step/configs/VERILOG_REWARDS.md +90 -0
  5. examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +183 -0
  6. examples/multi_step/configs/crafter_eval_synth_qwen4b.toml +35 -0
  7. examples/multi_step/configs/crafter_eval_text_only_groq_qwen32b.toml +36 -0
  8. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +12 -11
  9. examples/multi_step/configs/crafter_sft_qwen30b_lora.toml +62 -0
  10. examples/multi_step/configs/crafter_synth_backend.md +40 -0
  11. examples/multi_step/configs/verilog_eval_groq_qwen32b.toml +31 -0
  12. examples/multi_step/configs/verilog_eval_synth_qwen8b.toml +33 -0
  13. examples/multi_step/configs/verilog_rl_lora.toml +190 -0
  14. examples/multi_step/convert_traces_to_sft.py +84 -0
  15. examples/multi_step/judges/crafter_backend_judge.py +220 -0
  16. examples/multi_step/judges/verilog_backend_judge.py +234 -0
  17. examples/multi_step/readme.md +48 -0
  18. examples/multi_step/run_sft_qwen30b.sh +45 -0
  19. examples/multi_step/verilog_rl_lora.md +218 -0
  20. examples/qwen_coder/configs/coder_lora_30b.toml +3 -2
  21. examples/qwen_coder/configs/coder_lora_4b.toml +2 -1
  22. examples/qwen_coder/configs/coder_lora_small.toml +2 -1
  23. examples/qwen_vl/BUGS_AND_FIXES.md +232 -0
  24. examples/qwen_vl/IMAGE_VALIDATION_COMPLETE.md +271 -0
  25. examples/qwen_vl/IMAGE_VALIDATION_SUMMARY.md +260 -0
  26. examples/qwen_vl/INFERENCE_SFT_TESTS.md +412 -0
  27. examples/qwen_vl/NEXT_STEPS_2B.md +325 -0
  28. examples/qwen_vl/QUICKSTART.md +327 -0
  29. examples/qwen_vl/QUICKSTART_RL_VISION.md +110 -0
  30. examples/qwen_vl/README.md +154 -0
  31. examples/qwen_vl/RL_VISION_COMPLETE.md +475 -0
  32. examples/qwen_vl/RL_VISION_TESTING.md +333 -0
  33. examples/qwen_vl/SDK_VISION_INTEGRATION.md +328 -0
  34. examples/qwen_vl/SETUP_COMPLETE.md +275 -0
  35. examples/qwen_vl/VISION_TESTS_COMPLETE.md +490 -0
  36. examples/qwen_vl/VLM_PIPELINE_COMPLETE.md +242 -0
  37. examples/qwen_vl/__init__.py +2 -0
  38. examples/qwen_vl/collect_data_via_cli.md +423 -0
  39. examples/qwen_vl/collect_vision_traces.py +368 -0
  40. examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +127 -0
  41. examples/qwen_vl/configs/crafter_vlm_sft_example.toml +60 -0
  42. examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +43 -0
  43. examples/qwen_vl/configs/eval_gpt4o_vision_proper.toml +29 -0
  44. examples/qwen_vl/configs/eval_gpt5nano_vision.toml +45 -0
  45. examples/qwen_vl/configs/eval_qwen2vl_vision.toml +44 -0
  46. examples/qwen_vl/configs/filter_qwen2vl_sft.toml +50 -0
  47. examples/qwen_vl/configs/filter_vision_sft.toml +53 -0
  48. examples/qwen_vl/configs/filter_vision_test.toml +8 -0
  49. examples/qwen_vl/configs/sft_qwen3_vl_2b_test.toml +54 -0
  50. examples/qwen_vl/crafter_gpt5nano_agent.py +308 -0
  51. examples/qwen_vl/crafter_qwen_vl_agent.py +300 -0
  52. examples/qwen_vl/run_vision_comparison.sh +62 -0
  53. examples/qwen_vl/run_vision_sft_pipeline.sh +175 -0
  54. examples/qwen_vl/test_image_validation.py +201 -0
  55. examples/qwen_vl/test_sft_vision_data.py +110 -0
  56. examples/rl/README.md +1 -1
  57. examples/rl/configs/eval_base_qwen.toml +17 -0
  58. examples/rl/configs/eval_rl_qwen.toml +13 -0
  59. examples/rl/configs/rl_from_base_qwen.toml +37 -0
  60. examples/rl/configs/rl_from_base_qwen17.toml +76 -0
  61. examples/rl/configs/rl_from_ft_qwen.toml +37 -0
  62. examples/rl/run_eval.py +436 -0
  63. examples/rl/run_rl_and_save.py +111 -0
  64. examples/rl/task_app/README.md +22 -0
  65. examples/rl/task_app/math_single_step.py +990 -0
  66. examples/rl/task_app/math_task_app.py +111 -0
  67. examples/sft/README.md +5 -5
  68. examples/sft/configs/crafter_fft_qwen0p6b.toml +4 -2
  69. examples/sft/configs/crafter_lora_qwen0p6b.toml +4 -3
  70. examples/sft/evaluate.py +4 -4
  71. examples/sft/export_dataset.py +7 -4
  72. examples/sft/generate_traces.py +2 -0
  73. examples/swe/task_app/README.md +1 -1
  74. examples/swe/task_app/grpo_swe_mini.py +1 -1
  75. examples/swe/task_app/grpo_swe_mini_task_app.py +0 -12
  76. examples/swe/task_app/hosted/envs/mini_swe/environment.py +13 -13
  77. examples/swe/task_app/hosted/policy_routes.py +0 -2
  78. examples/swe/task_app/hosted/rollout.py +2 -8
  79. examples/task_apps/IMAGE_ONLY_EVAL_QUICKSTART.md +258 -0
  80. examples/task_apps/crafter/CREATE_SFT_DATASET.md +273 -0
  81. examples/task_apps/crafter/EVAL_IMAGE_ONLY_RESULTS.md +152 -0
  82. examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +174 -0
  83. examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +268 -0
  84. examples/task_apps/crafter/QUERY_EXAMPLES.md +203 -0
  85. examples/task_apps/crafter/README_IMAGE_ONLY_EVAL.md +316 -0
  86. examples/task_apps/crafter/eval_image_only_gpt4o.toml +28 -0
  87. examples/task_apps/crafter/eval_text_only_groq_llama.toml +36 -0
  88. examples/task_apps/crafter/filter_sft_dataset.toml +16 -0
  89. examples/task_apps/crafter/task_app/__init__.py +3 -0
  90. examples/task_apps/crafter/task_app/grpo_crafter.py +309 -14
  91. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/environment.py +10 -0
  92. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +75 -4
  93. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +17 -2
  94. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +55 -3
  95. examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +114 -32
  96. examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +127 -27
  97. examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +156 -0
  98. examples/task_apps/enron/__init__.py +1 -0
  99. examples/task_apps/enron/filter_sft.toml +5 -0
  100. examples/task_apps/enron/tests/__init__.py +2 -0
  101. examples/task_apps/enron/tests/integration/__init__.py +2 -0
  102. examples/task_apps/enron/tests/integration/test_enron_eval.py +2 -0
  103. examples/task_apps/enron/tests/unit/__init__.py +2 -0
  104. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_COMPLETE.md +283 -0
  105. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_STATUS.md +155 -0
  106. examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +415 -0
  107. examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +29 -0
  108. examples/task_apps/pokemon_red/pallet_town_rl_config.toml +2 -0
  109. examples/task_apps/pokemon_red/task_app.py +199 -6
  110. examples/task_apps/pokemon_red/test_pallet_town_rewards.py +2 -0
  111. examples/task_apps/sokoban/filter_sft.toml +5 -0
  112. examples/task_apps/sokoban/tests/__init__.py +2 -0
  113. examples/task_apps/sokoban/tests/integration/__init__.py +2 -0
  114. examples/task_apps/sokoban/tests/unit/__init__.py +2 -0
  115. examples/task_apps/verilog/eval_groq_qwen32b.toml +8 -4
  116. examples/task_apps/verilog/filter_sft.toml +5 -0
  117. examples/task_apps/verilog/task_app/grpo_verilog.py +258 -23
  118. examples/task_apps/verilog/tests/__init__.py +2 -0
  119. examples/task_apps/verilog/tests/integration/__init__.py +2 -0
  120. examples/task_apps/verilog/tests/integration/test_verilog_eval.py +2 -0
  121. examples/task_apps/verilog/tests/unit/__init__.py +2 -0
  122. examples/vlm/README.md +3 -3
  123. examples/vlm/configs/crafter_vlm_gpt4o.toml +2 -0
  124. examples/vlm/crafter_openai_vlm_agent.py +3 -5
  125. examples/vlm/filter_image_rows.py +1 -1
  126. examples/vlm/run_crafter_vlm_benchmark.py +2 -2
  127. examples/warming_up_to_rl/_utils.py +92 -0
  128. examples/warming_up_to_rl/analyze_trace_db.py +1 -1
  129. examples/warming_up_to_rl/configs/crafter_fft.toml +2 -0
  130. examples/warming_up_to_rl/configs/crafter_fft_4b.toml +2 -0
  131. examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +2 -0
  132. examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +2 -0
  133. examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +2 -1
  134. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +2 -1
  135. examples/warming_up_to_rl/configs/rl_from_ft.toml +2 -0
  136. examples/warming_up_to_rl/export_trace_sft.py +174 -60
  137. examples/warming_up_to_rl/groq_test.py +2 -0
  138. examples/warming_up_to_rl/readme.md +63 -132
  139. examples/warming_up_to_rl/run_fft_and_save.py +1 -1
  140. examples/warming_up_to_rl/run_local_rollout.py +2 -0
  141. examples/warming_up_to_rl/run_local_rollout_modal.py +2 -0
  142. examples/warming_up_to_rl/run_local_rollout_parallel.py +2 -0
  143. examples/warming_up_to_rl/run_local_rollout_traced.py +2 -0
  144. examples/warming_up_to_rl/run_rl_and_save.py +1 -1
  145. examples/warming_up_to_rl/run_rollout_remote.py +2 -0
  146. examples/warming_up_to_rl/task_app/README.md +42 -0
  147. examples/warming_up_to_rl/task_app/grpo_crafter.py +696 -0
  148. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +135 -0
  149. examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
  150. examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
  151. examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +143 -0
  152. examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1226 -0
  153. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
  154. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
  155. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
  156. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +522 -0
  157. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +478 -0
  158. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +108 -0
  159. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
  160. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
  161. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +204 -0
  162. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
  163. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +618 -0
  164. examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +100 -0
  165. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +1081 -0
  166. examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +195 -0
  167. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1861 -0
  168. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
  169. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +211 -0
  170. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +161 -0
  171. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +137 -0
  172. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +62 -0
  173. synth_ai/__init__.py +44 -30
  174. synth_ai/_utils/__init__.py +47 -0
  175. synth_ai/_utils/base_url.py +10 -0
  176. synth_ai/_utils/http.py +10 -0
  177. synth_ai/_utils/prompts.py +10 -0
  178. synth_ai/_utils/task_app_state.py +12 -0
  179. synth_ai/_utils/user_config.py +10 -0
  180. synth_ai/api/models/supported.py +145 -7
  181. synth_ai/api/train/__init__.py +13 -1
  182. synth_ai/api/train/cli.py +30 -7
  183. synth_ai/api/train/config_finder.py +18 -11
  184. synth_ai/api/train/env_resolver.py +13 -10
  185. synth_ai/cli/__init__.py +66 -49
  186. synth_ai/cli/_modal_wrapper.py +9 -6
  187. synth_ai/cli/_typer_patch.py +0 -2
  188. synth_ai/cli/_validate_task_app.py +22 -4
  189. synth_ai/cli/legacy_root_backup.py +3 -1
  190. synth_ai/cli/lib/__init__.py +10 -0
  191. synth_ai/cli/lib/task_app_discovery.py +7 -0
  192. synth_ai/cli/lib/task_app_env.py +518 -0
  193. synth_ai/cli/recent.py +1 -0
  194. synth_ai/cli/setup.py +266 -0
  195. synth_ai/cli/task_app_deploy.py +16 -0
  196. synth_ai/cli/task_app_list.py +25 -0
  197. synth_ai/cli/task_app_modal_serve.py +16 -0
  198. synth_ai/cli/task_app_serve.py +18 -0
  199. synth_ai/cli/task_apps.py +392 -141
  200. synth_ai/cli/train.py +18 -0
  201. synth_ai/cli/tui.py +62 -0
  202. synth_ai/demos/__init__.py +10 -0
  203. synth_ai/demos/core/__init__.py +28 -1
  204. synth_ai/demos/crafter/__init__.py +1 -0
  205. synth_ai/demos/crafter/crafter_fft_4b.toml +55 -0
  206. synth_ai/demos/crafter/grpo_crafter_task_app.py +185 -0
  207. synth_ai/demos/crafter/rl_from_base_qwen4b.toml +74 -0
  208. synth_ai/demos/demo_registry.py +176 -0
  209. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
  210. synth_ai/demos/math/__init__.py +1 -0
  211. synth_ai/demos/math/_common.py +16 -0
  212. synth_ai/demos/math/app.py +38 -0
  213. synth_ai/demos/math/config.toml +76 -0
  214. synth_ai/demos/math/deploy_modal.py +54 -0
  215. synth_ai/demos/math/modal_task_app.py +702 -0
  216. synth_ai/demos/math/task_app_entry.py +51 -0
  217. synth_ai/environments/environment/core.py +7 -1
  218. synth_ai/environments/examples/bandit/engine.py +0 -1
  219. synth_ai/environments/examples/bandit/environment.py +0 -1
  220. synth_ai/environments/examples/crafter_classic/environment.py +1 -1
  221. synth_ai/environments/examples/verilog/engine.py +76 -10
  222. synth_ai/environments/examples/wordle/environment.py +0 -1
  223. synth_ai/evals/base.py +16 -5
  224. synth_ai/evals/client.py +1 -1
  225. synth_ai/inference/client.py +1 -1
  226. synth_ai/learning/client.py +1 -1
  227. synth_ai/learning/health.py +1 -1
  228. synth_ai/learning/jobs.py +1 -1
  229. synth_ai/learning/rl/client.py +1 -1
  230. synth_ai/learning/rl/env_keys.py +1 -1
  231. synth_ai/learning/rl/secrets.py +1 -1
  232. synth_ai/learning/sft/client.py +1 -1
  233. synth_ai/learning/sft/data.py +407 -4
  234. synth_ai/learning/validators.py +4 -1
  235. synth_ai/task/__init__.py +11 -1
  236. synth_ai/task/apps/__init__.py +5 -2
  237. synth_ai/task/config.py +259 -0
  238. synth_ai/task/contracts.py +15 -2
  239. synth_ai/task/rubrics/__init__.py +4 -2
  240. synth_ai/task/rubrics/loaders.py +27 -4
  241. synth_ai/task/rubrics/scoring.py +3 -0
  242. synth_ai/task/rubrics.py +219 -0
  243. synth_ai/task/trace_correlation_helpers.py +328 -0
  244. synth_ai/task/tracing_utils.py +14 -3
  245. synth_ai/task/validators.py +145 -2
  246. synth_ai/tracing_v3/config.py +15 -13
  247. synth_ai/tracing_v3/constants.py +21 -0
  248. synth_ai/tracing_v3/db_config.py +3 -1
  249. synth_ai/tracing_v3/decorators.py +10 -7
  250. synth_ai/tracing_v3/session_tracer.py +10 -0
  251. synth_ai/tracing_v3/turso/daemon.py +2 -2
  252. synth_ai/tracing_v3/turso/native_manager.py +108 -77
  253. synth_ai/tracing_v3/utils.py +1 -1
  254. synth_ai/tui/__init__.py +5 -0
  255. synth_ai/tui/__main__.py +13 -0
  256. synth_ai/tui/cli/__init__.py +1 -0
  257. synth_ai/tui/cli/query_experiments.py +164 -0
  258. synth_ai/tui/cli/query_experiments_v3.py +164 -0
  259. synth_ai/tui/dashboard.py +911 -0
  260. synth_ai/utils/__init__.py +101 -0
  261. synth_ai/utils/base_url.py +94 -0
  262. synth_ai/utils/cli.py +131 -0
  263. synth_ai/utils/env.py +287 -0
  264. synth_ai/utils/http.py +169 -0
  265. synth_ai/utils/modal.py +308 -0
  266. synth_ai/utils/process.py +212 -0
  267. synth_ai/utils/prompts.py +39 -0
  268. synth_ai/utils/sqld.py +122 -0
  269. synth_ai/utils/task_app_discovery.py +882 -0
  270. synth_ai/utils/task_app_env.py +186 -0
  271. synth_ai/utils/task_app_state.py +318 -0
  272. synth_ai/utils/user_config.py +137 -0
  273. synth_ai/v0/config/__init__.py +1 -5
  274. synth_ai/v0/config/base_url.py +1 -7
  275. synth_ai/v0/tracing/config.py +1 -1
  276. synth_ai/v0/tracing/decorators.py +1 -1
  277. synth_ai/v0/tracing/upload.py +1 -1
  278. synth_ai/v0/tracing_v1/config.py +1 -1
  279. synth_ai/v0/tracing_v1/decorators.py +1 -1
  280. synth_ai/v0/tracing_v1/upload.py +1 -1
  281. {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/METADATA +85 -31
  282. {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/RECORD +286 -135
  283. synth_ai/cli/man.py +0 -106
  284. synth_ai/compound/cais.py +0 -0
  285. synth_ai/core/experiment.py +0 -13
  286. synth_ai/core/system.py +0 -15
  287. synth_ai/demo_registry.py +0 -295
  288. synth_ai/handshake.py +0 -109
  289. synth_ai/http.py +0 -26
  290. {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/WHEEL +0 -0
  291. {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/entry_points.txt +0 -0
  292. {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/licenses/LICENSE +0 -0
  293. {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/top_level.txt +0 -0
@@ -30,7 +30,7 @@ class SqldDaemon:
30
30
  self.db_path = db_path or CONFIG.sqld_db_path
31
31
  self.http_port = http_port or CONFIG.sqld_http_port
32
32
  self.binary_path = binary_path or self._find_binary()
33
- self.process: subprocess.Popen | None = None
33
+ self.process: subprocess.Popen[str] | None = None
34
34
 
35
35
  def _find_binary(self) -> str:
36
36
  """Find sqld binary in PATH."""
@@ -84,7 +84,7 @@ class SqldDaemon:
84
84
  pass
85
85
 
86
86
  # Check if process crashed
87
- if self.process.poll() is not None:
87
+ if self.process and self.process.poll() is not None:
88
88
  stdout, stderr = self.process.communicate()
89
89
  raise RuntimeError(
90
90
  f"sqld daemon failed to start:\nstdout: {stdout}\nstderr: {stderr}"
@@ -370,8 +370,20 @@ class NativeLibsqlTraceManager(TraceStorage):
370
370
 
371
371
  async def insert_session_trace(self, trace: SessionTrace) -> str:
372
372
  await self.initialize()
373
-
374
- if await self._session_exists(trace.session_id):
373
+
374
+ import logging as _logging
375
+ _logger = _logging.getLogger(__name__)
376
+ _logger.info(f"[TRACE_DEBUG] insert_session_trace START: session_id={trace.session_id}, {len(trace.markov_blanket_message_history)} messages")
377
+
378
+ session_exists = await self._session_exists(trace.session_id)
379
+ _logger.info(f"[TRACE_DEBUG] Session exists: {session_exists}")
380
+
381
+ step_id_map: dict[str, int] = {}
382
+
383
+ if session_exists:
384
+ _logger.warning(f"[TRACE_DEBUG] Session {trace.session_id} already exists, skipping events/timesteps, only updating messages!")
385
+ # Don't return early - we need to save messages!
386
+ # Just update metadata
375
387
  async with self._op_lock:
376
388
  conn = self._conn
377
389
  assert conn is not None
@@ -380,87 +392,102 @@ class NativeLibsqlTraceManager(TraceStorage):
380
392
  (_json_dumps(trace.metadata or {}), trace.session_id),
381
393
  )
382
394
  conn.commit()
383
- return trace.session_id
395
+ # Skip events and timesteps to ensure idempotency
396
+ else:
397
+ created_at = trace.created_at or datetime.now(UTC)
384
398
 
385
- created_at = trace.created_at or datetime.now(UTC)
399
+ async with self._op_lock:
400
+ conn = self._conn
401
+ assert conn is not None
402
+ conn.execute(
403
+ """
404
+ INSERT INTO session_traces (
405
+ session_id,
406
+ created_at,
407
+ num_timesteps,
408
+ num_events,
409
+ num_messages,
410
+ metadata
411
+ )
412
+ VALUES (?, ?, 0, 0, 0, ?)
413
+ """,
414
+ (
415
+ trace.session_id,
416
+ created_at.isoformat(),
417
+ _json_dumps(trace.metadata or {}),
418
+ ),
419
+ )
420
+ conn.commit()
421
+ _logger.info("[TRACE_DEBUG] Session row inserted")
386
422
 
387
- async with self._op_lock:
388
- conn = self._conn
389
- assert conn is not None
390
- conn.execute(
391
- """
392
- INSERT INTO session_traces (
393
- session_id,
394
- created_at,
395
- num_timesteps,
396
- num_events,
397
- num_messages,
398
- metadata
423
+ # Only insert timesteps and events if this is a new session
424
+ for step in trace.session_time_steps:
425
+ step_db_id = await self.ensure_timestep(
426
+ trace.session_id,
427
+ step_id=step.step_id,
428
+ step_index=step.step_index,
429
+ turn_number=step.turn_number,
430
+ started_at=step.timestamp,
431
+ completed_at=step.completed_at,
432
+ metadata=step.step_metadata or {},
399
433
  )
400
- VALUES (?, ?, 0, 0, 0, ?)
401
- """,
402
- (
434
+ step_id_map[step.step_id] = step_db_id
435
+
436
+ for event in trace.event_history:
437
+ step_ref = None
438
+ metadata = event.metadata or {}
439
+ if isinstance(metadata, dict):
440
+ step_ref = metadata.get("step_id")
441
+ timestep_db_id = step_id_map.get(step_ref) if step_ref else None
442
+ await self.insert_event_row(
403
443
  trace.session_id,
404
- created_at.isoformat(),
405
- _json_dumps(trace.metadata or {}),
406
- ),
407
- )
408
- conn.commit()
409
-
410
- step_id_map: dict[str, int] = {}
411
-
412
- for step in trace.session_time_steps:
413
- step_db_id = await self.ensure_timestep(
414
- trace.session_id,
415
- step_id=step.step_id,
416
- step_index=step.step_index,
417
- turn_number=step.turn_number,
418
- started_at=step.timestamp,
419
- completed_at=step.completed_at,
420
- metadata=step.step_metadata or {},
421
- )
422
- step_id_map[step.step_id] = step_db_id
444
+ timestep_db_id=timestep_db_id,
445
+ event=event,
446
+ metadata_override=event.metadata or {},
447
+ )
423
448
 
424
- for event in trace.event_history:
425
- step_ref = None
426
- metadata = event.metadata or {}
427
- if isinstance(metadata, dict):
449
+ import logging as _logging
450
+ _logger = _logging.getLogger(__name__)
451
+ _logger.info(f"[TRACE_DEBUG] insert_session_trace: saving {len(trace.markov_blanket_message_history)} messages (session_exists={session_exists})")
452
+
453
+ # Only insert messages if this is a new session (for idempotency)
454
+ if not session_exists:
455
+ for idx, msg in enumerate(trace.markov_blanket_message_history):
456
+ metadata = dict(getattr(msg, "metadata", {}) or {})
428
457
  step_ref = metadata.get("step_id")
429
- timestep_db_id = step_id_map.get(step_ref) if step_ref else None
430
- await self.insert_event_row(
431
- trace.session_id,
432
- timestep_db_id=timestep_db_id,
433
- event=event,
434
- metadata_override=event.metadata or {},
435
- )
436
-
437
- for msg in trace.markov_blanket_message_history:
438
- metadata = dict(getattr(msg, "metadata", {}) or {})
439
- step_ref = metadata.get("step_id")
440
- content_value = msg.content
441
- if isinstance(msg.content, SessionMessageContent):
442
- if msg.content.json_payload:
443
- metadata.setdefault("json_payload", msg.content.json_payload)
444
- content_value = msg.content.json_payload
445
- else:
446
- content_value = msg.content.as_text()
447
- if msg.content.text:
448
- metadata.setdefault("text", msg.content.text)
449
- elif not isinstance(content_value, str):
458
+ content_value = msg.content
459
+ if isinstance(msg.content, SessionMessageContent):
460
+ if msg.content.json_payload:
461
+ metadata.setdefault("json_payload", msg.content.json_payload)
462
+ content_value = msg.content.json_payload
463
+ else:
464
+ content_value = msg.content.as_text()
465
+ if msg.content.text:
466
+ metadata.setdefault("text", msg.content.text)
467
+ elif not isinstance(content_value, str):
468
+ try:
469
+ content_value = json.dumps(content_value, ensure_ascii=False)
470
+ except (TypeError, ValueError):
471
+ content_value = str(content_value)
472
+
473
+ _logger.info(f"[TRACE_DEBUG] Message {idx+1}: type={msg.message_type}, content_len={len(str(content_value))}")
474
+
450
475
  try:
451
- content_value = json.dumps(content_value, ensure_ascii=False)
452
- except (TypeError, ValueError):
453
- content_value = str(content_value)
454
-
455
- await self.insert_message_row(
456
- trace.session_id,
457
- timestep_db_id=step_id_map.get(step_ref) if step_ref else None,
458
- message_type=msg.message_type,
459
- content=content_value,
460
- event_time=msg.time_record.event_time,
461
- message_time=msg.time_record.message_time,
462
- metadata=metadata,
463
- )
476
+ await self.insert_message_row(
477
+ trace.session_id,
478
+ timestep_db_id=step_id_map.get(step_ref) if step_ref else None,
479
+ message_type=msg.message_type,
480
+ content=content_value,
481
+ event_time=msg.time_record.event_time,
482
+ message_time=msg.time_record.message_time,
483
+ metadata=metadata,
484
+ )
485
+ _logger.info(f"[TRACE_DEBUG] Message {idx+1}: saved successfully")
486
+ except Exception as exc:
487
+ _logger.error(f"[TRACE_DEBUG] Message {idx+1}: FAILED TO SAVE: {exc}", exc_info=True)
488
+ raise
489
+ else:
490
+ _logger.info("[TRACE_DEBUG] Skipping message insertion for existing session (idempotency)")
464
491
 
465
492
  async with self._op_lock:
466
493
  conn = self._conn
@@ -904,7 +931,11 @@ class NativeLibsqlTraceManager(TraceStorage):
904
931
  if isinstance(event, LMCAISEvent):
905
932
  call_records = None
906
933
  if getattr(event, "call_records", None):
907
- call_records = [asdict(record) for record in event.call_records]
934
+ # Handle both dataclass instances and dicts (from deserialization)
935
+ call_records = [
936
+ asdict(record) if not isinstance(record, dict) else record
937
+ for record in event.call_records
938
+ ]
908
939
  payload.update(
909
940
  {
910
941
  "event_type": "cais",
@@ -10,7 +10,7 @@ from typing import Any
10
10
 
11
11
 
12
12
  def iso_now() -> str:
13
- """Get current UTC time as ISO format string."""
13
+ """Get current timezone.utc time as ISO format string."""
14
14
  return datetime.now(UTC).isoformat()
15
15
 
16
16
 
@@ -0,0 +1,5 @@
1
+ """Text User Interface utilities for synth-ai."""
2
+
3
+ from .dashboard import main
4
+
5
+ __all__ = ["main"]
@@ -0,0 +1,13 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Entry point for Synth AI TUI dashboard.
4
+
5
+ Usage:
6
+ python -m synth_ai.tui
7
+ python -m synth_ai.tui --url sqlite+aiosqlite:///path/to/db
8
+ """
9
+
10
+ from .dashboard import main
11
+
12
+ if __name__ == "__main__":
13
+ main()
@@ -0,0 +1 @@
1
+ """Command Line Interface tools for synth-ai."""
@@ -0,0 +1,164 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Query experiments and sessions from Turso/sqld using v3 tracing.
4
+ """
5
+
6
+ import argparse
7
+ import asyncio
8
+
9
+ from synth_ai.tracing_v3.turso import NativeLibsqlTraceManager
10
+
11
+
12
+ async def list_experiments(db_url: str):
13
+ """List all experiments in the database."""
14
+ db = NativeLibsqlTraceManager(db_url)
15
+ await db.initialize()
16
+
17
+ try:
18
+ df = await db.query_traces("""
19
+ SELECT
20
+ e.experiment_id,
21
+ e.name,
22
+ e.description,
23
+ e.created_at,
24
+ COUNT(DISTINCT st.session_id) as num_sessions,
25
+ COUNT(DISTINCT ev.id) as num_events,
26
+ SUM(CASE WHEN ev.event_type = 'cais' THEN ev.cost_usd ELSE 0 END) / 100.0 as total_cost,
27
+ SUM(CASE WHEN ev.event_type = 'cais' THEN ev.total_tokens ELSE 0 END) as total_tokens
28
+ FROM experiments e
29
+ LEFT JOIN session_traces st ON e.experiment_id = st.experiment_id
30
+ LEFT JOIN events ev ON st.session_id = ev.session_id
31
+ GROUP BY e.experiment_id, e.name, e.description, e.created_at
32
+ ORDER BY e.created_at DESC
33
+ """)
34
+
35
+ if df.empty:
36
+ print("No experiments found in database.")
37
+ return
38
+
39
+ print(f"\n{'=' * 100}")
40
+ print(f"{'Experiments in ' + db_url:^100}")
41
+ print(f"{'=' * 100}\n")
42
+
43
+ for _, row in df.iterrows():
44
+ print(f"🧪 {row['name']} (id: {row['experiment_id'][:8]}...)")
45
+ print(f" Created: {row['created_at']}")
46
+ print(f" Description: {row['description']}")
47
+ print(f" Sessions: {row['num_sessions']}")
48
+ print(f" Events: {row['num_events']:,}")
49
+ if row["total_cost"] and row["total_cost"] > 0:
50
+ print(f" Cost: ${row['total_cost']:.4f}")
51
+ if row["total_tokens"] and row["total_tokens"] > 0:
52
+ print(f" Tokens: {int(row['total_tokens']):,}")
53
+ print()
54
+ finally:
55
+ await db.close()
56
+
57
+
58
+ async def show_experiment_details(db_url: str, experiment_id: str):
59
+ """Show detailed information about a specific experiment."""
60
+ db = NativeLibsqlTraceManager(db_url)
61
+ await db.initialize()
62
+
63
+ try:
64
+ # Get experiment info
65
+ exp_df = await db.query_traces(
66
+ """
67
+ SELECT * FROM experiments WHERE experiment_id LIKE :exp_id
68
+ """,
69
+ {"exp_id": f"{experiment_id}%"},
70
+ )
71
+
72
+ if exp_df.empty:
73
+ print(f"No experiment found matching ID: {experiment_id}")
74
+ return
75
+
76
+ exp = exp_df.iloc[0]
77
+ print(f"\n{'=' * 100}")
78
+ print(f"Experiment: {exp['name']} ({exp['experiment_id']})")
79
+ print(f"{'=' * 100}\n")
80
+
81
+ # Get session statistics
82
+ sessions_df = await db.get_sessions_by_experiment(exp["experiment_id"])
83
+
84
+ if sessions_df:
85
+ print(f"Sessions: {len(sessions_df)}")
86
+
87
+ # Get aggregated stats
88
+ stats_df = await db.query_traces(
89
+ """
90
+ SELECT
91
+ COUNT(DISTINCT ev.id) as total_events,
92
+ COUNT(DISTINCT m.id) as total_messages,
93
+ SUM(CASE WHEN ev.event_type = 'cais' THEN ev.cost_usd ELSE 0 END) / 100.0 as total_cost,
94
+ SUM(CASE WHEN ev.event_type = 'cais' THEN ev.total_tokens ELSE 0 END) as total_tokens
95
+ FROM session_traces st
96
+ LEFT JOIN events ev ON st.session_id = ev.session_id
97
+ LEFT JOIN messages m ON st.session_id = m.session_id
98
+ WHERE st.experiment_id = :exp_id
99
+ """,
100
+ {"exp_id": exp["experiment_id"]},
101
+ )
102
+
103
+ if not stats_df.empty:
104
+ stats = stats_df.iloc[0]
105
+ print(f"Total events: {int(stats['total_events']):,}")
106
+ print(f"Total messages: {int(stats['total_messages']):,}")
107
+ print(f"Total cost: ${stats['total_cost']:.4f}")
108
+ print(f"Total tokens: {int(stats['total_tokens']):,}")
109
+
110
+ # Show session list
111
+ print("\nSession list:")
112
+ for sess in sessions_df:
113
+ print(f" - {sess['session_id']} ({sess['created_at']})")
114
+ print(
115
+ f" Timesteps: {sess['num_timesteps']}, Events: {sess['num_events']}, Messages: {sess['num_messages']}"
116
+ )
117
+ finally:
118
+ await db.close()
119
+
120
+
121
+ async def show_model_usage(db_url: str, model_name: str | None = None):
122
+ """Show model usage statistics."""
123
+ db = NativeLibsqlTraceManager(db_url)
124
+ await db.initialize()
125
+
126
+ try:
127
+ df = await db.get_model_usage(model_name=model_name)
128
+
129
+ if df.empty:
130
+ print("No model usage data found.")
131
+ return
132
+
133
+ print(f"\n{'=' * 100}")
134
+ print(f"{'Model Usage Statistics':^100}")
135
+ print(f"{'=' * 100}\n")
136
+
137
+ print(df.to_string(index=False))
138
+ finally:
139
+ await db.close()
140
+
141
+
142
+ async def main():
143
+ parser = argparse.ArgumentParser(description="Query experiments from Turso/sqld (v3)")
144
+ parser.add_argument(
145
+ "-u", "--url", default="sqlite+libsql://http://127.0.0.1:8080", help="Turso database URL"
146
+ )
147
+ parser.add_argument(
148
+ "-e", "--experiment", help="Show details for specific experiment ID (can be partial)"
149
+ )
150
+ parser.add_argument("-m", "--model", help="Show usage for specific model")
151
+ parser.add_argument("--usage", action="store_true", help="Show model usage statistics")
152
+
153
+ args = parser.parse_args()
154
+
155
+ if args.usage or args.model:
156
+ await show_model_usage(args.url, args.model)
157
+ elif args.experiment:
158
+ await show_experiment_details(args.url, args.experiment)
159
+ else:
160
+ await list_experiments(args.url)
161
+
162
+
163
+ if __name__ == "__main__":
164
+ asyncio.run(main())
@@ -0,0 +1,164 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Query experiments and sessions from Turso/sqld using v3 tracing.
4
+ """
5
+
6
+ import argparse
7
+ import asyncio
8
+
9
+ from synth_ai.tracing_v3.turso import NativeLibsqlTraceManager
10
+
11
+
12
+ async def list_experiments(db_url: str):
13
+ """List all experiments in the database."""
14
+ db = NativeLibsqlTraceManager(db_url)
15
+ await db.initialize()
16
+
17
+ try:
18
+ df = await db.query_traces("""
19
+ SELECT
20
+ e.experiment_id,
21
+ e.name,
22
+ e.description,
23
+ e.created_at,
24
+ COUNT(DISTINCT st.session_id) as num_sessions,
25
+ COUNT(DISTINCT ev.id) as num_events,
26
+ SUM(CASE WHEN ev.event_type = 'cais' THEN ev.cost_usd ELSE 0 END) / 100.0 as total_cost,
27
+ SUM(CASE WHEN ev.event_type = 'cais' THEN ev.total_tokens ELSE 0 END) as total_tokens
28
+ FROM experiments e
29
+ LEFT JOIN session_traces st ON e.experiment_id = st.experiment_id
30
+ LEFT JOIN events ev ON st.session_id = ev.session_id
31
+ GROUP BY e.experiment_id, e.name, e.description, e.created_at
32
+ ORDER BY e.created_at DESC
33
+ """)
34
+
35
+ if df.empty:
36
+ print("No experiments found in database.")
37
+ return
38
+
39
+ print(f"\n{'=' * 100}")
40
+ print(f"{'Experiments in ' + db_url:^100}")
41
+ print(f"{'=' * 100}\n")
42
+
43
+ for _, row in df.iterrows():
44
+ print(f"🧪 {row['name']} (id: {row['experiment_id'][:8]}...)")
45
+ print(f" Created: {row['created_at']}")
46
+ print(f" Description: {row['description']}")
47
+ print(f" Sessions: {row['num_sessions']}")
48
+ print(f" Events: {row['num_events']:,}")
49
+ if row["total_cost"] and row["total_cost"] > 0:
50
+ print(f" Cost: ${row['total_cost']:.4f}")
51
+ if row["total_tokens"] and row["total_tokens"] > 0:
52
+ print(f" Tokens: {int(row['total_tokens']):,}")
53
+ print()
54
+ finally:
55
+ await db.close()
56
+
57
+
58
+ async def show_experiment_details(db_url: str, experiment_id: str):
59
+ """Show detailed information about a specific experiment."""
60
+ db = NativeLibsqlTraceManager(db_url)
61
+ await db.initialize()
62
+
63
+ try:
64
+ # Get experiment info
65
+ exp_df = await db.query_traces(
66
+ """
67
+ SELECT * FROM experiments WHERE experiment_id LIKE :exp_id
68
+ """,
69
+ {"exp_id": f"{experiment_id}%"},
70
+ )
71
+
72
+ if exp_df.empty:
73
+ print(f"No experiment found matching ID: {experiment_id}")
74
+ return
75
+
76
+ exp = exp_df.iloc[0]
77
+ print(f"\n{'=' * 100}")
78
+ print(f"Experiment: {exp['name']} ({exp['experiment_id']})")
79
+ print(f"{'=' * 100}\n")
80
+
81
+ # Get session statistics
82
+ sessions_df = await db.get_sessions_by_experiment(exp["experiment_id"])
83
+
84
+ if sessions_df:
85
+ print(f"Sessions: {len(sessions_df)}")
86
+
87
+ # Get aggregated stats
88
+ stats_df = await db.query_traces(
89
+ """
90
+ SELECT
91
+ COUNT(DISTINCT ev.id) as total_events,
92
+ COUNT(DISTINCT m.id) as total_messages,
93
+ SUM(CASE WHEN ev.event_type = 'cais' THEN ev.cost_usd ELSE 0 END) / 100.0 as total_cost,
94
+ SUM(CASE WHEN ev.event_type = 'cais' THEN ev.total_tokens ELSE 0 END) as total_tokens
95
+ FROM session_traces st
96
+ LEFT JOIN events ev ON st.session_id = ev.session_id
97
+ LEFT JOIN messages m ON st.session_id = m.session_id
98
+ WHERE st.experiment_id = :exp_id
99
+ """,
100
+ {"exp_id": exp["experiment_id"]},
101
+ )
102
+
103
+ if not stats_df.empty:
104
+ stats = stats_df.iloc[0]
105
+ print(f"Total events: {int(stats['total_events']):,}")
106
+ print(f"Total messages: {int(stats['total_messages']):,}")
107
+ print(f"Total cost: ${stats['total_cost']:.4f}")
108
+ print(f"Total tokens: {int(stats['total_tokens']):,}")
109
+
110
+ # Show session list
111
+ print("\nSession list:")
112
+ for sess in sessions_df:
113
+ print(f" - {sess['session_id']} ({sess['created_at']})")
114
+ print(
115
+ f" Timesteps: {sess['num_timesteps']}, Events: {sess['num_events']}, Messages: {sess['num_messages']}"
116
+ )
117
+ finally:
118
+ await db.close()
119
+
120
+
121
+ async def show_model_usage(db_url: str, model_name: str | None = None):
122
+ """Show model usage statistics."""
123
+ db = NativeLibsqlTraceManager(db_url)
124
+ await db.initialize()
125
+
126
+ try:
127
+ df = await db.get_model_usage(model_name=model_name)
128
+
129
+ if df.empty:
130
+ print("No model usage data found.")
131
+ return
132
+
133
+ print(f"\n{'=' * 100}")
134
+ print(f"{'Model Usage Statistics':^100}")
135
+ print(f"{'=' * 100}\n")
136
+
137
+ print(df.to_string(index=False))
138
+ finally:
139
+ await db.close()
140
+
141
+
142
+ async def main():
143
+ parser = argparse.ArgumentParser(description="Query experiments from Turso/sqld (v3)")
144
+ parser.add_argument(
145
+ "-u", "--url", default="sqlite+libsql://http://127.0.0.1:8080", help="Turso database URL"
146
+ )
147
+ parser.add_argument(
148
+ "-e", "--experiment", help="Show details for specific experiment ID (can be partial)"
149
+ )
150
+ parser.add_argument("-m", "--model", help="Show usage for specific model")
151
+ parser.add_argument("--usage", action="store_true", help="Show model usage statistics")
152
+
153
+ args = parser.parse_args()
154
+
155
+ if args.usage or args.model:
156
+ await show_model_usage(args.url, args.model)
157
+ elif args.experiment:
158
+ await show_experiment_details(args.url, args.experiment)
159
+ else:
160
+ await list_experiments(args.url)
161
+
162
+
163
+ if __name__ == "__main__":
164
+ asyncio.run(main())