synth-ai 0.2.13.dev1__py3-none-any.whl → 0.2.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (291) hide show
  1. examples/multi_step/configs/README_verilog_rl.md +77 -0
  2. examples/multi_step/configs/VERILOG_REWARDS.md +90 -0
  3. examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +183 -0
  4. examples/multi_step/configs/crafter_eval_synth_qwen4b.toml +35 -0
  5. examples/multi_step/configs/crafter_eval_text_only_groq_qwen32b.toml +36 -0
  6. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +17 -5
  7. examples/multi_step/configs/crafter_synth_backend.md +40 -0
  8. examples/multi_step/configs/verilog_eval_groq_qwen32b.toml +31 -0
  9. examples/multi_step/configs/verilog_eval_synth_qwen8b.toml +33 -0
  10. examples/multi_step/configs/verilog_rl_lora.toml +190 -0
  11. examples/multi_step/judges/crafter_backend_judge.py +220 -0
  12. examples/multi_step/judges/verilog_backend_judge.py +234 -0
  13. examples/multi_step/readme.md +48 -0
  14. examples/multi_step/verilog_rl_lora.md +218 -0
  15. examples/qwen_coder/configs/coder_lora_30b.toml +1 -1
  16. examples/sft/evaluate.py +2 -0
  17. examples/sft/generate_traces.py +2 -0
  18. examples/swe/task_app/grpo_swe_mini.py +56 -26
  19. examples/swe/task_app/hosted/rollout.py +42 -0
  20. examples/swe/task_app/hosted/test_service.py +5 -6
  21. examples/task_apps/IMAGE_ONLY_EVAL_QUICKSTART.md +258 -0
  22. examples/task_apps/TESTING.md +275 -0
  23. examples/task_apps/__init__.py +0 -0
  24. examples/task_apps/crafter/CREATE_SFT_DATASET.md +273 -0
  25. examples/task_apps/crafter/EVAL_IMAGE_ONLY_RESULTS.md +152 -0
  26. examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +174 -0
  27. examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +268 -0
  28. examples/task_apps/crafter/QUERY_EXAMPLES.md +203 -0
  29. examples/task_apps/crafter/README_IMAGE_ONLY_EVAL.md +316 -0
  30. examples/task_apps/crafter/__init__.py +0 -0
  31. examples/task_apps/crafter/eval_image_only_gpt4o.toml +28 -0
  32. examples/task_apps/crafter/eval_text_only_groq_llama.toml +36 -0
  33. examples/task_apps/crafter/filter_sft_dataset.toml +16 -0
  34. examples/task_apps/crafter/task_app/__init__.py +5 -0
  35. examples/{warming_up_to_rl → task_apps/crafter}/task_app/grpo_crafter.py +324 -21
  36. examples/{warming_up_to_rl → task_apps/crafter}/task_app/grpo_crafter_task_app.py +1 -1
  37. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/environment.py +10 -0
  38. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/policy.py +76 -7
  39. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/react_agent.py +17 -2
  40. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/inference/openai_client.py +25 -3
  41. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/policy_routes.py +77 -4
  42. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/rollout.py +117 -9
  43. examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/test_service.py +5 -6
  44. examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +218 -0
  45. examples/task_apps/dev/pokemon_emerald/__init__.py +2 -0
  46. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/README.md +811 -0
  47. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/__init__.py +120 -0
  48. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/action.py +160 -0
  49. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/memory.py +155 -0
  50. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/perception.py +69 -0
  51. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/planning.py +96 -0
  52. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/simple.py +1502 -0
  53. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/system_prompt.py +4 -0
  54. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/grab_map.py +68 -0
  55. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/manual.py +216 -0
  56. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/__init__.py +35 -0
  57. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emerald_utils.py +631 -0
  58. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emulator.py +1544 -0
  59. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/enums.py +1428 -0
  60. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/memory_reader.py +4848 -0
  61. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/types.py +41 -0
  62. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/utils.py +298 -0
  63. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pyproject.toml +95 -0
  64. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/run.py +204 -0
  65. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/__init__.py +0 -0
  66. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/app.py +2152 -0
  67. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/client.py +429 -0
  68. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/frame_server.py +155 -0
  69. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/README.md +78 -0
  70. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/__init__.py +0 -0
  71. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/run_tests.py +122 -0
  72. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_direct.py +76 -0
  73. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_prompts.py +413 -0
  74. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_battle_state_formatting.py +204 -0
  75. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection.py +133 -0
  76. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection_comprehensive.py +229 -0
  77. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_direct_agent_emulator.py +300 -0
  78. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_fps_adjustment_pytest.py +205 -0
  79. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_direct.py +200 -0
  80. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_transition.py +284 -0
  81. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_map_ground_truth_comparison.py +468 -0
  82. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_memory_map.py +575 -0
  83. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_server_map_validation.py +311 -0
  84. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_torchic_state.py +259 -0
  85. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/__init__.py +0 -0
  86. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/anticheat.py +372 -0
  87. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/checkpoint.py +296 -0
  88. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/error_handler.py +275 -0
  89. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/get_local_ip.py +22 -0
  90. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/helpers.py +44 -0
  91. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/llm_logger.py +514 -0
  92. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_formatter.py +415 -0
  93. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher.py +1763 -0
  94. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher_singleton.py +33 -0
  95. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_trimmer.py +106 -0
  96. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_visualizer.py +334 -0
  97. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/ocr_dialogue.py +1020 -0
  98. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/recording.py +188 -0
  99. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/state_formatter.py +1481 -0
  100. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/vlm.py +862 -0
  101. examples/task_apps/dev/pokemon_emerald/modal_app.py +114 -0
  102. examples/task_apps/dev/pokemon_emerald/task_app/README.md +81 -0
  103. examples/task_apps/dev/pokemon_emerald/task_app/__init__.py +6 -0
  104. examples/task_apps/dev/pokemon_emerald/task_app/pokemon_emerald.py +685 -0
  105. examples/task_apps/enron/__init__.py +1 -0
  106. examples/task_apps/enron/eval_groq_qwen32.toml +16 -0
  107. examples/task_apps/enron/filter_sft.toml +5 -0
  108. examples/task_apps/enron/task_app/README.md +14 -0
  109. examples/task_apps/enron/task_app/__init__.py +1 -0
  110. examples/task_apps/enron/task_app/grpo_enron.py +906 -0
  111. examples/task_apps/enron/task_app/grpo_enron_task_app.py +146 -0
  112. examples/task_apps/enron/tests/__init__.py +4 -0
  113. examples/task_apps/enron/tests/conftest.py +115 -0
  114. examples/task_apps/enron/tests/integration/__init__.py +4 -0
  115. examples/task_apps/enron/tests/integration/test_enron_eval.py +179 -0
  116. examples/task_apps/enron/tests/integration/test_enron_rollout.py +135 -0
  117. examples/task_apps/enron/tests/unit/__init__.py +4 -0
  118. examples/task_apps/enron/tests/unit/test_enron_environment.py +126 -0
  119. examples/task_apps/math/__init__.py +0 -0
  120. examples/{rl/task_app → task_apps/math}/math_single_step.py +19 -10
  121. examples/task_apps/pokemon_battle/__init__.py +2 -0
  122. examples/task_apps/pokemon_battle/modal_app.py +104 -0
  123. examples/task_apps/pokemon_battle/task_app/README.md +68 -0
  124. examples/task_apps/pokemon_battle/task_app/__init__.py +6 -0
  125. examples/task_apps/pokemon_battle/task_app/pokemon_showdown.py +932 -0
  126. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_COMPLETE.md +283 -0
  127. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_STATUS.md +155 -0
  128. examples/task_apps/pokemon_red/README.md +357 -0
  129. examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +415 -0
  130. examples/task_apps/pokemon_red/__init__.py +3 -0
  131. examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +29 -0
  132. examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +225 -0
  133. examples/task_apps/pokemon_red/pallet_town_rl_config.toml +75 -0
  134. examples/task_apps/pokemon_red/task_app.py +799 -0
  135. examples/task_apps/pokemon_red/test_pallet_town_rewards.py +193 -0
  136. examples/task_apps/sokoban/README.md +307 -0
  137. examples/task_apps/sokoban/__init__.py +3 -0
  138. examples/task_apps/sokoban/eval_groq_qwen32.toml +16 -0
  139. examples/task_apps/sokoban/eval_openai_gpt5.toml +16 -0
  140. examples/task_apps/sokoban/filter_sft.toml +5 -0
  141. examples/task_apps/sokoban/task_app.py +1058 -0
  142. examples/task_apps/sokoban/tests/__init__.py +4 -0
  143. examples/task_apps/sokoban/tests/conftest.py +113 -0
  144. examples/task_apps/sokoban/tests/integration/__init__.py +4 -0
  145. examples/task_apps/sokoban/tests/integration/test_sokoban_eval.py +57 -0
  146. examples/task_apps/sokoban/tests/integration/test_sokoban_rollout.py +198 -0
  147. examples/task_apps/sokoban/tests/unit/__init__.py +4 -0
  148. examples/task_apps/sokoban/tests/unit/test_sokoban_environment.py +114 -0
  149. examples/task_apps/verilog/__init__.py +1 -0
  150. examples/task_apps/verilog/eval_groq_qwen32b.toml +24 -0
  151. examples/task_apps/verilog/filter_sft.toml +5 -0
  152. examples/task_apps/verilog/task_app/README.md +12 -0
  153. examples/task_apps/verilog/task_app/__init__.py +1 -0
  154. examples/task_apps/verilog/task_app/grpo_verilog.py +1166 -0
  155. examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +145 -0
  156. examples/task_apps/verilog/tests/__init__.py +4 -0
  157. examples/task_apps/verilog/tests/conftest.py +115 -0
  158. examples/task_apps/verilog/tests/integration/__init__.py +4 -0
  159. examples/task_apps/verilog/tests/integration/test_verilog_eval.py +181 -0
  160. examples/task_apps/verilog/tests/integration/test_verilog_rollout.py +55 -0
  161. examples/task_apps/verilog/tests/unit/__init__.py +4 -0
  162. examples/task_apps/verilog/tests/unit/test_verilog_scoring.py +118 -0
  163. examples/vlm/crafter_openai_vlm_agent.py +4 -4
  164. examples/vlm/run_crafter_vlm_benchmark.py +4 -4
  165. examples/warming_up_to_rl/groq_test.py +2 -0
  166. examples/warming_up_to_rl/run_local_rollout.py +2 -0
  167. examples/warming_up_to_rl/run_local_rollout_modal.py +2 -0
  168. examples/warming_up_to_rl/run_local_rollout_parallel.py +2 -0
  169. examples/warming_up_to_rl/run_local_rollout_traced.py +2 -0
  170. examples/warming_up_to_rl/run_rollout_remote.py +2 -0
  171. examples/workflows/__init__.py +0 -0
  172. examples/workflows/math_rl/__init__.py +0 -0
  173. examples/workflows/math_rl/download_dataset.py +80 -0
  174. synth_ai/__init__.py +2 -2
  175. synth_ai/api/models/supported.py +1 -0
  176. synth_ai/api/train/builders.py +25 -11
  177. synth_ai/api/train/cli.py +12 -6
  178. synth_ai/api/train/configs/__init__.py +10 -10
  179. synth_ai/api/train/configs/rl.py +5 -4
  180. synth_ai/api/train/configs/sft.py +4 -3
  181. synth_ai/api/train/env_resolver.py +5 -2
  182. synth_ai/api/train/supported_algos.py +10 -5
  183. synth_ai/api/train/utils.py +7 -4
  184. synth_ai/cli/__init__.py +48 -59
  185. synth_ai/cli/_modal_wrapper.py +3 -2
  186. synth_ai/cli/_storage.py +4 -3
  187. synth_ai/cli/_validate_task_app.py +11 -0
  188. synth_ai/cli/balance.py +4 -3
  189. synth_ai/cli/calc.py +2 -2
  190. synth_ai/cli/demo.py +14 -7
  191. synth_ai/cli/legacy_root_backup.py +1 -1
  192. synth_ai/cli/recent.py +1 -1
  193. synth_ai/cli/rl_demo.py +8 -7
  194. synth_ai/cli/root.py +0 -97
  195. synth_ai/cli/status.py +1 -1
  196. synth_ai/cli/task_apps.py +1922 -190
  197. synth_ai/cli/traces.py +1 -1
  198. synth_ai/cli/tui.py +57 -0
  199. synth_ai/cli/turso.py +1 -1
  200. synth_ai/cli/watch.py +1 -1
  201. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +29 -17
  202. synth_ai/environments/examples/crafter_classic/environment.py +1 -1
  203. synth_ai/environments/examples/enron/engine.py +7 -2
  204. synth_ai/environments/examples/enron/environment.py +68 -0
  205. synth_ai/environments/examples/red/engine.py +27 -0
  206. synth_ai/environments/examples/red/engine_helpers/memory_map.py +7 -0
  207. synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_progression.py +477 -0
  208. synth_ai/environments/examples/red/engine_helpers/state_extraction.py +32 -0
  209. synth_ai/environments/examples/red/environment.py +60 -0
  210. synth_ai/environments/examples/sokoban/taskset.py +116 -0
  211. synth_ai/environments/examples/verilog/engine.py +104 -12
  212. synth_ai/evals/client.py +58 -61
  213. synth_ai/jobs/client.py +16 -4
  214. synth_ai/judge_schemas.py +9 -9
  215. synth_ai/py.typed +0 -0
  216. synth_ai/task/__init__.py +24 -5
  217. synth_ai/task/apps/__init__.py +1 -0
  218. synth_ai/task/config.py +257 -0
  219. synth_ai/task/contracts.py +138 -39
  220. synth_ai/task/proxy.py +48 -56
  221. synth_ai/task/rubrics/__init__.py +56 -0
  222. synth_ai/task/rubrics/loaders.py +152 -0
  223. synth_ai/task/rubrics/models.py +57 -0
  224. synth_ai/task/rubrics/scoring.py +116 -0
  225. synth_ai/{rubrics/validators.py → task/rubrics/strict.py} +53 -30
  226. synth_ai/task/server.py +8 -7
  227. synth_ai/task/trace_correlation_helpers.py +315 -0
  228. synth_ai/task/validators.py +413 -6
  229. synth_ai/tracing_v3/abstractions.py +3 -3
  230. synth_ai/tracing_v3/decorators.py +7 -3
  231. synth_ai/tracing_v3/llm_call_record_helpers.py +5 -5
  232. synth_ai/tracing_v3/replica_sync.py +4 -4
  233. synth_ai/tracing_v3/serialization.py +5 -5
  234. synth_ai/tracing_v3/session_tracer.py +16 -6
  235. synth_ai/tracing_v3/storage/base.py +29 -29
  236. synth_ai/tracing_v3/storage/config.py +3 -3
  237. synth_ai/tracing_v3/trace_utils.py +317 -0
  238. synth_ai/tracing_v3/turso/daemon.py +8 -7
  239. synth_ai/tracing_v3/turso/native_manager.py +66 -43
  240. synth_ai/tracing_v3/utils.py +3 -3
  241. synth_ai/tui/__init__.py +5 -0
  242. synth_ai/tui/__main__.py +13 -0
  243. synth_ai/tui/cli/__init__.py +1 -0
  244. synth_ai/tui/cli/query_experiments.py +164 -0
  245. synth_ai/tui/cli/query_experiments_v3.py +164 -0
  246. synth_ai/tui/dashboard.py +906 -0
  247. {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/METADATA +4 -1
  248. {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/RECORD +278 -126
  249. examples/agora_ex/README_MoE.md +0 -224
  250. examples/agora_ex/__init__.py +0 -7
  251. examples/agora_ex/agora_ex.py +0 -65
  252. examples/agora_ex/agora_ex_task_app.py +0 -590
  253. examples/agora_ex/configs/rl_lora_qwen3_moe_2xh200.toml +0 -121
  254. examples/agora_ex/reward_fn_grpo-human.py +0 -129
  255. examples/agora_ex/system_prompt_CURRENT.md +0 -63
  256. examples/agora_ex/task_app/agora_ex_task_app.py +0 -590
  257. examples/agora_ex/task_app/reward_fn_grpo-human.py +0 -129
  258. examples/agora_ex/task_app/system_prompt_CURRENT.md +0 -63
  259. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +0 -62
  260. synth_ai/rubrics/__init__.py +0 -22
  261. synth_ai/task/rubrics.py +0 -219
  262. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/README.md +0 -0
  263. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/README.md +0 -0
  264. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/__init__.py +0 -0
  265. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/branching.py +0 -0
  266. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/environment_routes.py +0 -0
  267. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/__init__.py +0 -0
  268. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/__init__.py +0 -0
  269. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/app.py +0 -0
  270. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/shared.py +0 -0
  271. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/envs/crafter/tools.py +0 -0
  272. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/hosted_app.py +0 -0
  273. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/inference/__init__.py +0 -0
  274. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/main.py +0 -0
  275. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/registry.py +0 -0
  276. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/storage/__init__.py +0 -0
  277. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/storage/volume.py +0 -0
  278. /examples/{warming_up_to_rl → task_apps/crafter}/task_app/synth_envs_hosted/test_agents.py +0 -0
  279. /examples/{rl/task_app → task_apps/math}/README.md +0 -0
  280. /examples/{rl/task_app → task_apps/math}/math_task_app.py +0 -0
  281. /examples/{rl → workflows/math_rl}/configs/eval_base_qwen.toml +0 -0
  282. /examples/{rl → workflows/math_rl}/configs/eval_rl_qwen.toml +0 -0
  283. /examples/{rl → workflows/math_rl}/configs/rl_from_base_qwen.toml +0 -0
  284. /examples/{rl → workflows/math_rl}/configs/rl_from_base_qwen17.toml +0 -0
  285. /examples/{rl → workflows/math_rl}/configs/rl_from_ft_qwen.toml +0 -0
  286. /examples/{rl → workflows/math_rl}/run_eval.py +0 -0
  287. /examples/{rl → workflows/math_rl}/run_rl_and_save.py +0 -0
  288. {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/WHEEL +0 -0
  289. {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/entry_points.txt +0 -0
  290. {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/licenses/LICENSE +0 -0
  291. {synth_ai-0.2.13.dev1.dist-info → synth_ai-0.2.14.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,906 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Interactive TUI Dashboard for Synth AI experiments.
4
+
5
+ Launch with: python -m synth_ai.tui.dashboard
6
+ """
7
+
8
+ import logging
9
+ import os
10
+ from datetime import datetime
11
+ from urllib.parse import urlparse
12
+
13
+ # Import textual components with graceful fallback
14
+ try:
15
+ from textual import on
16
+ from textual.app import App, ComposeResult
17
+ from textual.binding import Binding
18
+ from textual.containers import Container
19
+ from textual.reactive import reactive
20
+ from textual.timer import Timer
21
+ from textual.widgets import (
22
+ DataTable,
23
+ Footer,
24
+ Header,
25
+ Static,
26
+ )
27
+ _TEXTUAL_AVAILABLE = True
28
+ except (ImportError, ModuleNotFoundError):
29
+ # Textual not available - provide dummy classes for type checking
30
+ on = None # type: ignore
31
+ App = object # type: ignore
32
+ ComposeResult = object # type: ignore
33
+ Binding = object # type: ignore
34
+ Container = object # type: ignore
35
+ reactive = lambda x: x # type: ignore
36
+ Timer = object # type: ignore
37
+ DataTable = object # type: ignore
38
+ Footer = object # type: ignore
39
+ Header = object # type: ignore
40
+ Static = object # type: ignore
41
+ _TEXTUAL_AVAILABLE = False
42
+
43
+ # Import database manager with graceful fallback
44
+ try:
45
+ from synth_ai.tracing_v3.turso.native_manager import NativeLibsqlTraceManager # type: ignore[import-untyped]
46
+ _DB_AVAILABLE = True
47
+ except (ImportError, ModuleNotFoundError, TypeError):
48
+ # Database manager not available - provide dummy class
49
+ NativeLibsqlTraceManager = object # type: ignore
50
+ _DB_AVAILABLE = False
51
+
52
+ import asyncio
53
+ import requests
54
+ from datetime import timedelta
55
+
56
+
57
+ class ExperimentRow:
58
+ """Data structure for experiment display."""
59
+
60
+ def __init__(
61
+ self,
62
+ exp_id: str,
63
+ name: str,
64
+ description: str,
65
+ created_at: datetime,
66
+ sessions: int,
67
+ events: int,
68
+ messages: int,
69
+ cost: float,
70
+ tokens: int,
71
+ ):
72
+ self.exp_id = exp_id
73
+ self.name = name or "Unnamed"
74
+ self.description = description or ""
75
+ self.created_at = created_at
76
+ self.sessions = sessions
77
+ self.events = events
78
+ self.messages = messages
79
+ self.cost = cost
80
+ self.tokens = tokens
81
+
82
+ def to_row(self) -> list[str]:
83
+ """Convert to table row format."""
84
+ return [
85
+ self.exp_id[:8], # Shortened ID
86
+ self.name[:20], # Truncated name
87
+ str(self.sessions),
88
+ str(self.events),
89
+ str(self.messages),
90
+ f"${self.cost:.4f}",
91
+ f"{self.tokens:,}",
92
+ self.created_at.strftime("%H:%M"),
93
+ ]
94
+
95
+
96
+ class ExperimentTable(DataTable):
97
+ """Custom DataTable for experiments with refresh capability."""
98
+
99
+ def __init__(self, **kwargs):
100
+ super().__init__(**kwargs)
101
+ self.experiments: list[ExperimentRow] = []
102
+ self.selected_exp_id: str | None = None
103
+
104
+ def setup_table(self):
105
+ """Initialize table columns."""
106
+ self.add_columns("ID", "Name", "Sessions", "Events", "Messages", "Cost", "Tokens", "Time")
107
+
108
+ async def refresh_data(self, db_manager: NativeLibsqlTraceManager | None) -> None:
109
+ """Refresh experiment data from database."""
110
+ if not db_manager:
111
+ # Database not available, clear the table
112
+ self.experiments.clear()
113
+ self.clear()
114
+ return
115
+
116
+ try:
117
+ # Get experiment list with stats using raw query
118
+ df = await db_manager.query_traces("""
119
+ SELECT
120
+ e.experiment_id,
121
+ e.name,
122
+ e.description,
123
+ e.created_at,
124
+ COUNT(DISTINCT st.session_id) as num_sessions,
125
+ COUNT(DISTINCT ev.id) as num_events,
126
+ COUNT(DISTINCT m.id) as num_messages,
127
+ SUM(CASE WHEN ev.event_type = 'cais' THEN ev.cost_usd ELSE 0 END) / 100.0 as total_cost,
128
+ SUM(CASE WHEN ev.event_type = 'cais' THEN ev.total_tokens ELSE 0 END) as total_tokens
129
+ FROM experiments e
130
+ LEFT JOIN session_traces st ON e.experiment_id = st.experiment_id
131
+ LEFT JOIN events ev ON st.session_id = ev.session_id
132
+ LEFT JOIN messages m ON st.session_id = m.session_id
133
+ GROUP BY e.experiment_id, e.name, e.description, e.created_at
134
+ ORDER BY e.created_at DESC
135
+ """)
136
+
137
+ self.experiments.clear()
138
+ self.clear()
139
+
140
+ if not df.empty:
141
+ for _, row in df.iterrows():
142
+ exp_row = ExperimentRow(
143
+ exp_id=row["experiment_id"],
144
+ name=row["name"],
145
+ description=row["description"],
146
+ created_at=row["created_at"],
147
+ sessions=int(row["num_sessions"] or 0),
148
+ events=int(row["num_events"] or 0),
149
+ messages=int(row["num_messages"] or 0),
150
+ cost=float(row["total_cost"] or 0.0),
151
+ tokens=int(row["total_tokens"] or 0),
152
+ )
153
+ self.experiments.append(exp_row)
154
+ self.add_row(*exp_row.to_row(), key=exp_row.exp_id)
155
+
156
+ except Exception as e:
157
+ logging.error(f"Failed to refresh experiments: {e}")
158
+
159
+ def get_selected_experiment(self) -> ExperimentRow | None:
160
+ """Get currently selected experiment."""
161
+ if self.cursor_row >= 0 and self.cursor_row < len(self.experiments):
162
+ return self.experiments[self.cursor_row]
163
+ return None
164
+
165
+
166
+ class ExperimentDetail(Static):
167
+ """Detailed view of selected experiment."""
168
+
169
+ def __init__(self, **kwargs):
170
+ super().__init__(**kwargs)
171
+ self.current_experiment: ExperimentRow | None = None
172
+
173
+ def update_experiment(self, experiment: ExperimentRow | None):
174
+ """Update the displayed experiment details."""
175
+ self.current_experiment = experiment
176
+ if experiment:
177
+ details = f"""
178
+ 🔬 **{experiment.name}**
179
+ ID: {experiment.exp_id}
180
+ Description: {experiment.description or "No description"}
181
+
182
+ 📊 **Statistics**
183
+ Sessions: {experiment.sessions}
184
+ Events: {experiment.events}
185
+ Messages: {experiment.messages}
186
+ Cost: ${experiment.cost:.4f}
187
+ Tokens: {experiment.tokens:,}
188
+
189
+ 🕒 **Created**: {experiment.created_at.strftime("%Y-%m-%d %H:%M:%S")}
190
+ """.strip()
191
+ else:
192
+ details = "Select an experiment to view details"
193
+
194
+ self.update(details)
195
+
196
+
197
+ class DatabaseStatus(Static):
198
+ """Display database connection status."""
199
+
200
+ connection_status = reactive("🔴 Disconnected")
201
+ db_info = reactive("")
202
+
203
+ def __init__(self, **kwargs):
204
+ super().__init__(**kwargs)
205
+
206
+ def render(self) -> str:
207
+ status_line = f"Database: {self.connection_status}"
208
+ if self.db_info:
209
+ status_line += f" | {self.db_info}"
210
+ return status_line
211
+
212
+ def set_connected(self, url: str, db_name: str = ""):
213
+ parsed = urlparse(url)
214
+ if "sqlite" in url:
215
+ # Extract just the filename for cleaner display
216
+ from pathlib import Path
217
+ try:
218
+ path_part = url.split("///")[-1]
219
+ filename = Path(path_part).name
220
+ self.connection_status = f"🟢 {filename}"
221
+ except:
222
+ self.connection_status = f"🟢 Connected"
223
+ else:
224
+ host_info = f"{parsed.hostname}:{parsed.port}" if parsed.port else str(parsed.hostname)
225
+ self.connection_status = f"🟢 {host_info}"
226
+
227
+ if db_name:
228
+ self.db_info = f"[{db_name}]"
229
+
230
+ def set_disconnected(self, error: str = ""):
231
+ error_text = f" - {error}" if error else ""
232
+ self.connection_status = f"🔴 Disconnected{error_text}"
233
+ self.db_info = ""
234
+
235
+ def set_db_selector(self, current: int, total: int):
236
+ """Show database selector info."""
237
+ if total > 1:
238
+ self.db_info = f"DB {current + 1}/{total} (n/p to switch)"
239
+ else:
240
+ self.db_info = ""
241
+
242
+
243
+ class BalanceStatus(Static):
244
+ """Display balance and spending information (local + global)."""
245
+
246
+ # Global (backend API)
247
+ global_balance = reactive("$0.00")
248
+ global_spend_24h = reactive("$0.00")
249
+ global_spend_7d = reactive("$0.00")
250
+ global_status = reactive("⏳")
251
+
252
+ # Local (database)
253
+ local_traces = reactive(0)
254
+ local_cost = reactive("$0.00")
255
+ local_tokens = reactive(0)
256
+ local_tasks = reactive([]) # List of (task_name, count) tuples
257
+ local_status = reactive("⏳")
258
+
259
+ def __init__(self, **kwargs):
260
+ super().__init__(**kwargs)
261
+
262
+ def render(self) -> str:
263
+ # Format tokens safely
264
+ if isinstance(self.local_tokens, int) and self.local_tokens > 0:
265
+ if self.local_tokens >= 1_000_000:
266
+ tokens_str = f"{self.local_tokens / 1_000_000:.1f}M"
267
+ elif self.local_tokens >= 1_000:
268
+ tokens_str = f"{self.local_tokens / 1_000:.1f}K"
269
+ else:
270
+ tokens_str = f"{self.local_tokens}"
271
+ else:
272
+ tokens_str = str(self.local_tokens)
273
+
274
+ # Format tasks - show top 3 only
275
+ tasks_str = ""
276
+ if self.local_tasks and len(self.local_tasks) > 0:
277
+ top_tasks = self.local_tasks[:3]
278
+ task_lines = [f"{name} ({count})" for name, count in top_tasks]
279
+ tasks_str = " | " + ", ".join(task_lines)
280
+ if len(self.local_tasks) > 3:
281
+ tasks_str += f", +{len(self.local_tasks) - 3}"
282
+
283
+ # Compact single-line format
284
+ return f"""[b]Local[/b] {self.local_status} {self.local_traces} traces | {self.local_cost} | {tokens_str} tokens{tasks_str}
285
+
286
+ [b]Global[/b] {self.global_status} {self.global_balance} | 24h: {self.global_spend_24h} | 7d: {self.global_spend_7d}"""
287
+
288
+ def update_global(self, balance: float, spend_24h: float, spend_7d: float):
289
+ """Update global backend balance information."""
290
+ self.global_balance = f"${balance:.2f}"
291
+ self.global_spend_24h = f"${spend_24h:.2f}"
292
+ self.global_spend_7d = f"${spend_7d:.2f}"
293
+ self.global_status = "✅"
294
+
295
+ def update_local(self, traces: int, cost: float, tokens: int, tasks: list[tuple[str, int]] | None = None):
296
+ """Update local database statistics."""
297
+ self.local_traces = traces
298
+ self.local_cost = f"${cost:.4f}"
299
+ self.local_tokens = tokens
300
+ self.local_tasks = tasks or []
301
+ self.local_status = "✅"
302
+
303
+ def set_global_loading(self):
304
+ """Show loading state for global data."""
305
+ self.global_balance = "..."
306
+ self.global_spend_24h = "..."
307
+ self.global_spend_7d = "..."
308
+ self.global_status = "⏳"
309
+
310
+ def set_local_loading(self):
311
+ """Show loading state for local data."""
312
+ self.local_traces = 0
313
+ self.local_cost = "..."
314
+ self.local_tokens = 0
315
+ self.local_tasks = []
316
+ self.local_status = "⏳"
317
+
318
+ def set_global_error(self, error: str):
319
+ """Show error state for global data."""
320
+ self.global_balance = f"Error"
321
+ self.global_spend_24h = "-"
322
+ self.global_spend_7d = "-"
323
+ self.global_status = f"❌"
324
+
325
+ def set_local_error(self, error: str):
326
+ """Show error state for local data."""
327
+ self.local_traces = 0
328
+ self.local_cost = "Error"
329
+ self.local_tokens = 0
330
+ self.local_tasks = []
331
+ self.local_status = f"❌"
332
+
333
+ def set_global_unavailable(self):
334
+ """Mark global data as unavailable (no API key)."""
335
+ self.global_balance = "N/A"
336
+ self.global_spend_24h = "N/A"
337
+ self.global_spend_7d = "N/A"
338
+ self.global_status = "⚪"
339
+
340
+
341
+ class ActiveRunsTable(DataTable):
342
+ """Display currently active/running sessions."""
343
+
344
+ def __init__(self, **kwargs):
345
+ super().__init__(**kwargs)
346
+ self.active_runs: list[dict] = []
347
+
348
+ def setup_table(self):
349
+ """Initialize table columns."""
350
+ self.add_columns("Session", "Experiment", "Started", "Duration", "Events", "Status")
351
+
352
+ async def refresh_data(self, db_manager: NativeLibsqlTraceManager | None) -> None:
353
+ """Refresh active runs data from database."""
354
+ if not db_manager:
355
+ # Database not available, clear the table
356
+ self.active_runs.clear()
357
+ self.clear()
358
+ return
359
+
360
+ try:
361
+ # Get active sessions (those with recent activity in last 5 minutes)
362
+ cutoff_time = datetime.now() - timedelta(minutes=5)
363
+
364
+ df = await db_manager.query_traces("""
365
+ WITH recent_sessions AS (
366
+ SELECT
367
+ st.session_id,
368
+ st.experiment_id,
369
+ st.created_at,
370
+ e.name as experiment_name,
371
+ COUNT(ev.id) as event_count,
372
+ MAX(ev.created_at) as last_event_time
373
+ FROM session_traces st
374
+ LEFT JOIN experiments e ON st.experiment_id = e.experiment_id
375
+ LEFT JOIN events ev ON st.session_id = ev.session_id
376
+ WHERE st.created_at >= :cutoff_time
377
+ GROUP BY st.session_id, st.experiment_id, st.created_at, e.name
378
+ )
379
+ SELECT
380
+ session_id,
381
+ experiment_id,
382
+ experiment_name,
383
+ created_at,
384
+ event_count,
385
+ last_event_time
386
+ FROM recent_sessions
387
+ ORDER BY last_event_time DESC
388
+ """, {"cutoff_time": cutoff_time})
389
+
390
+ self.active_runs.clear()
391
+ self.clear()
392
+
393
+ if not df.empty:
394
+ for _, row in df.iterrows():
395
+ session_id = str(row["session_id"])
396
+ experiment_name = row["experiment_name"] or "Unknown"
397
+
398
+ # Parse datetime strings
399
+ try:
400
+ if isinstance(row["created_at"], str):
401
+ from dateutil import parser as date_parser
402
+ started_at = date_parser.parse(row["created_at"])
403
+ else:
404
+ started_at = row["created_at"]
405
+
406
+ if isinstance(row["last_event_time"], str):
407
+ from dateutil import parser as date_parser
408
+ last_event_time = date_parser.parse(row["last_event_time"])
409
+ else:
410
+ last_event_time = row["last_event_time"]
411
+ except Exception as e:
412
+ logging.error(f"Failed to parse datetime: {e}")
413
+ continue
414
+
415
+ duration = datetime.now() - started_at
416
+
417
+ # Format duration
418
+ if duration.total_seconds() < 3600: # Less than 1 hour
419
+ duration_str = f"{int(duration.total_seconds() // 60)}m"
420
+ else:
421
+ hours = int(duration.total_seconds() // 3600)
422
+ minutes = int((duration.total_seconds() % 3600) // 60)
423
+ duration_str = f"{hours}h {minutes}m"
424
+
425
+ # Status based on recent activity
426
+ time_since_last = datetime.now() - last_event_time
427
+ if time_since_last.total_seconds() < 60: # Active in last minute
428
+ status = "🟢 Active"
429
+ elif time_since_last.total_seconds() < 300: # Active in last 5 minutes
430
+ status = "🟡 Recent"
431
+ else:
432
+ status = "🟠 Idle"
433
+
434
+ run_info = {
435
+ "session_id": session_id,
436
+ "experiment_name": experiment_name,
437
+ "started_at": started_at,
438
+ "duration": duration_str,
439
+ "events": int(row["event_count"]),
440
+ "status": status
441
+ }
442
+ self.active_runs.append(run_info)
443
+ self.add_row(
444
+ session_id[:8], # Shortened session ID
445
+ experiment_name[:20], # Truncated name
446
+ started_at.strftime("%H:%M:%S"),
447
+ duration_str,
448
+ str(run_info["events"]),
449
+ status,
450
+ key=session_id
451
+ )
452
+
453
+ except Exception as e:
454
+ logging.error(f"Failed to refresh active runs: {e}")
455
+
456
+
457
+ def find_databases() -> list[tuple[str, str]]:
458
+ """Find all available databases in common locations.
459
+
460
+ Returns:
461
+ List of (name, path) tuples
462
+ """
463
+ databases = []
464
+ search_paths = [
465
+ "traces/v3",
466
+ "traces",
467
+ ".",
468
+ ]
469
+
470
+ for search_path in search_paths:
471
+ try:
472
+ from pathlib import Path
473
+ search_dir = Path(search_path)
474
+ if not search_dir.exists():
475
+ continue
476
+
477
+ # Find all .db files
478
+ for db_file in search_dir.glob("**/*.db"):
479
+ if db_file.is_file():
480
+ # Use relative path from current directory
481
+ rel_path = str(db_file.relative_to(Path.cwd()))
482
+ # Create a friendly name
483
+ name = db_file.stem # filename without .db
484
+ if len(databases) == 0:
485
+ name = f"{name} (default)"
486
+ databases.append((name, rel_path))
487
+ except Exception as e:
488
+ logging.debug(f"Error scanning {search_path}: {e}")
489
+
490
+ # If no databases found, return default
491
+ if not databases:
492
+ databases.append(("synth_ai (default)", "traces/v3/synth_ai.db"))
493
+
494
+ return databases
495
+
496
+
497
+ class SynthDashboard(App if _TEXTUAL_AVAILABLE else object):
498
+ """Main Synth AI TUI Dashboard application."""
499
+
500
+ CSS = """
501
+ Screen {
502
+ layout: grid;
503
+ grid-columns: 1fr 1fr 1fr;
504
+ grid-rows: auto 1fr 1fr auto;
505
+ grid-gutter: 1;
506
+ }
507
+
508
+ #header {
509
+ column-span: 3;
510
+ height: 3;
511
+ }
512
+
513
+ #experiments-table {
514
+ row-span: 2;
515
+ }
516
+
517
+ #active-runs-panel {
518
+ column-span: 1;
519
+ }
520
+
521
+ #balance-status {
522
+ column-span: 1;
523
+ }
524
+
525
+ #experiment-detail {
526
+ column-span: 2;
527
+ height: 1fr;
528
+ }
529
+
530
+ #status-bar {
531
+ column-span: 3;
532
+ height: 3;
533
+ }
534
+
535
+ ExperimentTable {
536
+ height: 100%;
537
+ }
538
+
539
+ ActiveRunsTable {
540
+ height: 100%;
541
+ }
542
+
543
+ ExperimentDetail {
544
+ border: solid $primary;
545
+ padding: 1;
546
+ height: 100%;
547
+ }
548
+
549
+ BalanceStatus {
550
+ border: solid $primary;
551
+ padding: 1;
552
+ height: 100%;
553
+ }
554
+
555
+ DatabaseStatus {
556
+ height: 1;
557
+ padding: 0 1;
558
+ }
559
+
560
+ .section-title {
561
+ text-style: bold;
562
+ height: 1;
563
+ }
564
+ """
565
+
566
+ BINDINGS = [
567
+ Binding("q", "quit", "Quit"),
568
+ Binding("r", "refresh", "Refresh"),
569
+ Binding("n", "next_db", "Next DB"),
570
+ Binding("p", "prev_db", "Prev DB"),
571
+ Binding("d", "toggle_debug", "Debug"),
572
+ ("ctrl+c", "quit", "Quit"),
573
+ ]
574
+
575
+ def __init__(self, db_url: str = "sqlite+aiosqlite:///traces/v3/synth_ai.db"):
576
+ super().__init__()
577
+ self.db_url = db_url
578
+ self.db_manager: NativeLibsqlTraceManager | None = None
579
+ self.refresh_timer: Timer | None = None
580
+
581
+ # Database discovery and selection
582
+ self.available_dbs: list[tuple[str, str]] = find_databases()
583
+ self.current_db_index: int = 0
584
+
585
+ # Log discovered databases
586
+ logging.info(f"Found {len(self.available_dbs)} database(s):")
587
+ for idx, (name, path) in enumerate(self.available_dbs):
588
+ logging.info(f" [{idx+1}] {name}: {path}")
589
+
590
+ # Try to find the initial db_url in available_dbs
591
+ for idx, (name, path) in enumerate(self.available_dbs):
592
+ if path in db_url or db_url.endswith(path):
593
+ self.current_db_index = idx
594
+ logging.info(f"Using database: {name} ({path})")
595
+ break
596
+
597
+ def compose(self) -> ComposeResult:
598
+ """Create the UI layout."""
599
+ yield Header(show_clock=True)
600
+
601
+ with Container(id="experiments-table"):
602
+ yield Static("🧪 Experiments", classes="section-title")
603
+ yield ExperimentTable(id="experiments")
604
+
605
+ with Container(id="active-runs-panel"):
606
+ yield Static("⚡ Active Runs", classes="section-title")
607
+ yield ActiveRunsTable(id="active-runs")
608
+
609
+ with Container(id="balance-status"):
610
+ yield Static("💰 Balance & Stats", classes="section-title")
611
+ yield BalanceStatus(id="balance")
612
+
613
+ with Container(id="experiment-detail"):
614
+ yield Static("📋 Details", classes="section-title")
615
+ yield ExperimentDetail(id="detail")
616
+
617
+ with Container(id="status-bar"):
618
+ yield DatabaseStatus(id="db-status")
619
+ yield Footer()
620
+
621
+ async def on_mount(self) -> None:
622
+ """Initialize the app when mounted."""
623
+ # Setup database connection - make it optional
624
+ await self._connect_to_database()
625
+
626
+ # Setup tables
627
+ exp_table = self.query_one("#experiments", ExperimentTable)
628
+ exp_table.setup_table()
629
+
630
+ active_runs_table = self.query_one("#active-runs", ActiveRunsTable)
631
+ active_runs_table.setup_table()
632
+
633
+ # Set balance loading state
634
+ balance_widget = self.query_one("#balance", BalanceStatus)
635
+ balance_widget.set_global_loading()
636
+ balance_widget.set_local_loading()
637
+
638
+ # Initial data load
639
+ await self.action_refresh()
640
+
641
+ # Start auto-refresh timer (every 5 seconds)
642
+ self.refresh_timer = self.set_interval(5.0, self._auto_refresh_data)
643
+
644
+ async def _auto_refresh_data(self) -> None:
645
+ """Auto-refresh data periodically."""
646
+ exp_table = self.query_one("#experiments", ExperimentTable)
647
+ active_runs_table = self.query_one("#active-runs", ActiveRunsTable)
648
+ balance_widget = self.query_one("#balance", BalanceStatus)
649
+
650
+ if self.db_manager:
651
+ await exp_table.refresh_data(self.db_manager)
652
+ await active_runs_table.refresh_data(self.db_manager)
653
+ await self._refresh_local_stats(balance_widget)
654
+
655
+ # Always try to refresh global balance (independent of local DB)
656
+ await self._refresh_global_balance(balance_widget)
657
+
658
+ async def action_refresh(self) -> None:
659
+ """Manual refresh action."""
660
+ exp_table = self.query_one("#experiments", ExperimentTable)
661
+ active_runs_table = self.query_one("#active-runs", ActiveRunsTable)
662
+ balance_widget = self.query_one("#balance", BalanceStatus)
663
+
664
+ balance_widget.set_global_loading()
665
+ balance_widget.set_local_loading()
666
+
667
+ if self.db_manager:
668
+ await exp_table.refresh_data(self.db_manager)
669
+ await active_runs_table.refresh_data(self.db_manager)
670
+ await self._refresh_local_stats(balance_widget)
671
+
672
+ # Always try to refresh global balance (independent of local DB)
673
+ await self._refresh_global_balance(balance_widget)
674
+
675
+ async def _refresh_local_stats(self, balance_widget: BalanceStatus) -> None:
676
+ """Refresh local database statistics."""
677
+ if not self.db_manager:
678
+ logging.warning("No database manager available for local stats")
679
+ balance_widget.set_local_error("No database")
680
+ return
681
+
682
+ try:
683
+ logging.info("Fetching local stats from database...")
684
+ # Query local trace statistics
685
+ df = await self.db_manager.query_traces("""
686
+ SELECT
687
+ COUNT(DISTINCT st.session_id) as num_traces,
688
+ SUM(CASE WHEN ev.event_type = 'cais' THEN ev.cost_usd ELSE 0 END) / 100.0 as total_cost,
689
+ SUM(CASE WHEN ev.event_type = 'cais' THEN ev.total_tokens ELSE 0 END) as total_tokens
690
+ FROM session_traces st
691
+ LEFT JOIN events ev ON st.session_id = ev.session_id
692
+ """)
693
+
694
+ # Query task/environment breakdown from metadata
695
+ task_df = await self.db_manager.query_traces("""
696
+ SELECT
697
+ json_extract(metadata, '$.env_name') as task_name,
698
+ COUNT(DISTINCT session_id) as trace_count
699
+ FROM session_traces
700
+ WHERE json_extract(metadata, '$.env_name') IS NOT NULL
701
+ GROUP BY task_name
702
+ ORDER BY trace_count DESC
703
+ LIMIT 10
704
+ """)
705
+
706
+ if not df.empty:
707
+ row = df.iloc[0]
708
+ num_traces = int(row["num_traces"] or 0)
709
+ total_cost = float(row["total_cost"] or 0.0)
710
+ total_tokens = int(row["total_tokens"] or 0)
711
+
712
+ # Parse task data
713
+ tasks = []
714
+ if not task_df.empty:
715
+ for _, task_row in task_df.iterrows():
716
+ task_name = task_row["task_name"]
717
+ count = int(task_row["trace_count"])
718
+ if task_name:
719
+ tasks.append((str(task_name), count))
720
+
721
+ logging.info(f"Local stats: {num_traces} traces, ${total_cost:.4f}, {total_tokens} tokens, {len(tasks)} tasks")
722
+ balance_widget.update_local(num_traces, total_cost, total_tokens, tasks)
723
+ else:
724
+ logging.warning("Query returned empty dataframe")
725
+ balance_widget.update_local(0, 0.0, 0, [])
726
+
727
+ except Exception as e:
728
+ logging.error(f"Failed to refresh local stats: {e}", exc_info=True)
729
+ balance_widget.set_local_error(str(e)[:20])
730
+
731
+ async def _refresh_global_balance(self, balance_widget: BalanceStatus) -> None:
732
+ """Refresh balance information from backend API."""
733
+ try:
734
+ # Try to get balance from environment or API
735
+ api_key = os.getenv("SYNTH_API_KEY") or os.getenv("SYNTH_BACKEND_API_KEY")
736
+ if not api_key:
737
+ balance_widget.set_global_unavailable()
738
+ return
739
+
740
+ # Try to get backend URL from environment
741
+ backend_url = os.getenv("SYNTH_BACKEND_BASE_URL") or "https://agent-learning.onrender.com/api/v1"
742
+
743
+ # Fetch balance
744
+ response = requests.get(
745
+ f"{backend_url}/balance/current",
746
+ headers={"Authorization": f"Bearer {api_key}"},
747
+ timeout=5
748
+ )
749
+ response.raise_for_status()
750
+ data = response.json()
751
+
752
+ balance = float(data.get("balance_dollars", 0.0))
753
+
754
+ # Try to get usage data
755
+ try:
756
+ usage_response = requests.get(
757
+ f"{backend_url}/balance/usage/windows",
758
+ params={"hours": "24,168"},
759
+ headers={"Authorization": f"Bearer {api_key}"},
760
+ timeout=5
761
+ )
762
+ if usage_response.ok:
763
+ usage_data = usage_response.json()
764
+ windows = {
765
+ int(r.get("window_hours")): r
766
+ for r in usage_data.get("windows", [])
767
+ if isinstance(r.get("window_hours"), int)
768
+ }
769
+
770
+ spend_24h = 0.0
771
+ spend_7d = 0.0
772
+
773
+ if 24 in windows:
774
+ spend_24h = float(windows[24].get("total_spend_cents", 0)) / 100.0
775
+ if 168 in windows:
776
+ spend_7d = float(windows[168].get("total_spend_cents", 0)) / 100.0
777
+
778
+ balance_widget.update_global(balance, spend_24h, spend_7d)
779
+ else:
780
+ # Fallback to just balance
781
+ balance_widget.update_global(balance, 0.0, 0.0)
782
+ except Exception:
783
+ # Fallback to just balance
784
+ balance_widget.update_global(balance, 0.0, 0.0)
785
+
786
+ except Exception as e:
787
+ # Only show error if it's not just "endpoint not available"
788
+ error_msg = str(e)
789
+ if "500" in error_msg or "Internal Server Error" in error_msg:
790
+ # Backend endpoint not implemented yet
791
+ balance_widget.set_global_unavailable()
792
+ else:
793
+ balance_widget.set_global_error(error_msg[:30])
794
+
795
+ async def action_quit(self) -> None:
796
+ """Quit the application."""
797
+ if self.refresh_timer:
798
+ self.refresh_timer.stop()
799
+ if self.db_manager:
800
+ await self.db_manager.close()
801
+ self.exit()
802
+
803
+ async def _connect_to_database(self) -> None:
804
+ """Connect to the current database."""
805
+ db_status = self.query_one("#db-status", DatabaseStatus)
806
+ balance_widget = self.query_one("#balance", BalanceStatus)
807
+
808
+ try:
809
+ # Close existing connection if any
810
+ if self.db_manager:
811
+ await self.db_manager.close()
812
+ self.db_manager = None
813
+
814
+ # Get current database info
815
+ db_name, db_path = self.available_dbs[self.current_db_index]
816
+ self.db_url = f"sqlite+aiosqlite:///{db_path}"
817
+
818
+ logging.info(f"Connecting to database: {db_name} ({db_path})")
819
+
820
+ self.db_manager = NativeLibsqlTraceManager(self.db_url)
821
+ if self.db_manager:
822
+ await self.db_manager.initialize()
823
+ db_status.set_connected(self.db_url, db_name)
824
+ db_status.set_db_selector(self.current_db_index, len(self.available_dbs))
825
+
826
+ # Immediately refresh local stats after connecting
827
+ logging.info("Refreshing local stats after connection...")
828
+ await self._refresh_local_stats(balance_widget)
829
+ else:
830
+ db_status.set_disconnected("Database manager not available")
831
+ balance_widget.set_local_error("No manager")
832
+ except (ImportError, ModuleNotFoundError):
833
+ # Database dependencies not available
834
+ db_status.set_disconnected("Database dependencies missing (libsql)")
835
+ self.db_manager = None
836
+ balance_widget.set_local_error("No libsql")
837
+ except Exception as e:
838
+ logging.error(f"Failed to connect to database: {e}", exc_info=True)
839
+ db_status.set_disconnected(str(e))
840
+ self.db_manager = None
841
+ balance_widget.set_local_error(str(e)[:15])
842
+
843
+ async def action_next_db(self) -> None:
844
+ """Switch to next database."""
845
+ if len(self.available_dbs) <= 1:
846
+ return
847
+
848
+ self.current_db_index = (self.current_db_index + 1) % len(self.available_dbs)
849
+ await self._connect_to_database()
850
+ await self.action_refresh()
851
+
852
+ async def action_prev_db(self) -> None:
853
+ """Switch to previous database."""
854
+ if len(self.available_dbs) <= 1:
855
+ return
856
+
857
+ self.current_db_index = (self.current_db_index - 1) % len(self.available_dbs)
858
+ await self._connect_to_database()
859
+ await self.action_refresh()
860
+
861
+ def action_toggle_debug(self) -> None:
862
+ """Toggle debug mode."""
863
+ # Could add debug panel or logging level toggle
864
+ pass
865
+
866
+ @on(DataTable.RowHighlighted, "#experiments")
867
+ def on_experiment_selected(self, event: DataTable.RowHighlighted) -> None:
868
+ """Handle experiment selection."""
869
+ exp_table = self.query_one("#experiments", ExperimentTable)
870
+ selected_exp = exp_table.get_selected_experiment()
871
+
872
+ detail_panel = self.query_one("#detail", ExperimentDetail)
873
+ detail_panel.update_experiment(selected_exp)
874
+
875
+
876
+ def main(argv: list[str] | None = None):
877
+ """Main entry point for the dashboard."""
878
+ # Check if textual is available
879
+ if not _TEXTUAL_AVAILABLE:
880
+ print("❌ Textual library is not available. Please install it with: pip install textual")
881
+ return
882
+
883
+ import argparse
884
+ import os
885
+
886
+ parser = argparse.ArgumentParser(description="Synth AI Interactive Dashboard")
887
+ parser.add_argument(
888
+ "-u",
889
+ "--url",
890
+ default=os.getenv("TUI_DB_URL", "sqlite+aiosqlite:///traces/v3/synth_ai.db"),
891
+ help="Database URL (default: traces/v3/synth_ai.db)",
892
+ )
893
+ parser.add_argument("--debug", action="store_true", default=bool(os.getenv("TUI_DEBUG")), help="Enable debug logging")
894
+
895
+ args = parser.parse_args(argv)
896
+
897
+ if args.debug:
898
+ logging.basicConfig(level=logging.DEBUG)
899
+
900
+ # Run the dashboard
901
+ app = SynthDashboard(db_url=args.url)
902
+ app.run()
903
+
904
+
905
+ if __name__ == "__main__":
906
+ main()