synth-ai 0.2.9.dev0__py3-none-any.whl → 0.2.23.dev3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (890) hide show
  1. examples/README.md +1 -0
  2. examples/__init__.py +16 -0
  3. examples/analyze_semantic_words.sh +17 -0
  4. examples/baseline/banking77_baseline.py +243 -0
  5. examples/baseline/banking77_pipeline_baseline.py +294 -0
  6. examples/baseline/crafter_baseline.py +407 -0
  7. examples/baseline/pokemon_red_baseline.py +326 -0
  8. examples/baseline/simple_baseline.py +56 -0
  9. examples/baseline/warming_up_to_rl_baseline.py +239 -0
  10. examples/blog_posts/gepa/README.md +355 -0
  11. examples/blog_posts/gepa/configs/banking77_gepa_local.toml +95 -0
  12. examples/blog_posts/gepa/configs/banking77_gepa_test.toml +80 -0
  13. examples/blog_posts/gepa/configs/banking77_mipro_local.toml +50 -0
  14. examples/blog_posts/gepa/configs/banking77_pipeline_gepa_local.toml +101 -0
  15. examples/blog_posts/gepa/configs/banking77_pipeline_gepa_test.toml +96 -0
  16. examples/blog_posts/gepa/configs/hotpotqa_gepa_local.toml +57 -0
  17. examples/blog_posts/gepa/configs/hotpotqa_gepa_qwen.toml +35 -0
  18. examples/blog_posts/gepa/configs/hotpotqa_mipro_local.toml +51 -0
  19. examples/blog_posts/gepa/configs/hover_gepa_local.toml +57 -0
  20. examples/blog_posts/gepa/configs/hover_gepa_qwen.toml +35 -0
  21. examples/blog_posts/gepa/configs/hover_mipro_local.toml +51 -0
  22. examples/blog_posts/gepa/configs/ifbench_gepa_local.toml +57 -0
  23. examples/blog_posts/gepa/configs/ifbench_gepa_qwen.toml +35 -0
  24. examples/blog_posts/gepa/configs/ifbench_mipro_local.toml +51 -0
  25. examples/blog_posts/gepa/configs/pupa_gepa_local.toml +58 -0
  26. examples/blog_posts/gepa/configs/pupa_mipro_local.toml +52 -0
  27. examples/blog_posts/gepa/deploy_banking77_task_app.sh +54 -0
  28. examples/blog_posts/gepa/gepa_baseline.py +204 -0
  29. examples/blog_posts/gepa/query_prompts_example.py +97 -0
  30. examples/blog_posts/gepa/run_gepa_banking77.sh +112 -0
  31. examples/blog_posts/gepa/run_gepa_banking77_pipeline.sh +163 -0
  32. examples/blog_posts/gepa/task_apps.py +105 -0
  33. examples/blog_posts/gepa/test_gepa_local.sh +67 -0
  34. examples/blog_posts/gepa/verify_banking77_setup.sh +123 -0
  35. examples/blog_posts/mipro/README.md +415 -0
  36. examples/blog_posts/mipro/configs/banking77_mipro_local.toml +91 -0
  37. examples/blog_posts/mipro/configs/banking77_mipro_test.toml +87 -0
  38. examples/blog_posts/mipro/configs/banking77_pipeline_mipro_gemini_flash_lite_local.toml +98 -0
  39. examples/blog_posts/mipro/configs/banking77_pipeline_mipro_gpt41mini_local.toml +96 -0
  40. examples/blog_posts/mipro/configs/banking77_pipeline_mipro_local.toml +94 -0
  41. examples/blog_posts/mipro/configs/banking77_pipeline_mipro_test.toml +170 -0
  42. examples/blog_posts/mipro/deploy_banking77_pipeline_task_app.sh +59 -0
  43. examples/blog_posts/mipro/deploy_banking77_task_app.sh +41 -0
  44. examples/blog_posts/mipro/multi_step.md +79 -0
  45. examples/blog_posts/mipro/run_mipro_banking77.sh +191 -0
  46. examples/blog_posts/mipro/run_mipro_banking77_pipeline.sh +171 -0
  47. examples/blog_posts/mipro/run_mipro_banking77_pipeline_gemini_flash_lite.sh +177 -0
  48. examples/blog_posts/mipro/run_mipro_banking77_pipeline_gpt41mini.sh +173 -0
  49. examples/blog_posts/mipro/verify_banking77_setup.sh +117 -0
  50. examples/blog_posts/pokemon_vl/README.md +98 -0
  51. examples/blog_posts/pokemon_vl/configs/eval_gpt5nano.toml +26 -0
  52. examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +27 -0
  53. examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
  54. examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
  55. examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +43 -0
  56. examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
  57. examples/blog_posts/pokemon_vl/extract_images.py +239 -0
  58. examples/blog_posts/pokemon_vl/pokemon_vl_baseline.py +326 -0
  59. examples/blog_posts/pokemon_vl/run_eval_extract_images.py +209 -0
  60. examples/blog_posts/pokemon_vl/run_qwen_eval_extract_images.py +212 -0
  61. examples/blog_posts/pokemon_vl/text_box_analysis.md +106 -0
  62. examples/blog_posts/warming_up_to_rl/ARCHITECTURE.md +195 -0
  63. examples/blog_posts/warming_up_to_rl/FINAL_TEST_RESULTS.md +127 -0
  64. examples/blog_posts/warming_up_to_rl/INFERENCE_SUCCESS.md +132 -0
  65. examples/blog_posts/warming_up_to_rl/README.md +158 -0
  66. examples/blog_posts/warming_up_to_rl/SMOKE_TESTING.md +164 -0
  67. examples/blog_posts/warming_up_to_rl/SMOKE_TEST_COMPLETE.md +253 -0
  68. examples/blog_posts/warming_up_to_rl/configs/eval_baseline_qwen32b_10x20.toml +25 -0
  69. examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
  70. examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b_10x20.toml +26 -0
  71. examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
  72. examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
  73. examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
  74. examples/blog_posts/warming_up_to_rl/configs/smoke_test.toml +75 -0
  75. examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +91 -0
  76. examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
  77. examples/blog_posts/warming_up_to_rl/warming_up_to_rl_baseline.py +187 -0
  78. examples/crafter_debug_render.py +186 -0
  79. examples/dev/qwen3_32b_qlora_4xh100.toml +45 -0
  80. examples/gepa/banking77_pipeline_gepa.toml +96 -0
  81. examples/gepa/multi_stage_gepa_example.toml +84 -0
  82. examples/gepa/run_gepa_banking77_pipeline.sh +157 -0
  83. examples/multi_step/SFT_README.md +147 -0
  84. examples/multi_step/configs/README_verilog_rl.md +77 -0
  85. examples/multi_step/configs/VERILOG_REWARDS.md +103 -0
  86. examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +196 -0
  87. examples/multi_step/configs/crafter_eval_synth_qwen4b.toml +35 -0
  88. examples/multi_step/configs/crafter_eval_text_only_groq_qwen32b.toml +36 -0
  89. examples/multi_step/configs/crafter_rl_outcome.toml +75 -0
  90. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +145 -0
  91. examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +84 -0
  92. examples/multi_step/configs/crafter_rl_stepwise_simple.toml +79 -0
  93. examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
  94. examples/multi_step/configs/crafter_sft_qwen30b_lora.toml +62 -0
  95. examples/multi_step/configs/crafter_synth_backend.md +40 -0
  96. examples/multi_step/configs/verilog_eval_groq_qwen32b.toml +31 -0
  97. examples/multi_step/configs/verilog_eval_synth_qwen8b.toml +33 -0
  98. examples/multi_step/configs/verilog_rl_lora.toml +147 -0
  99. examples/multi_step/convert_traces_to_sft.py +84 -0
  100. examples/multi_step/crafter_rl_lora.md +70 -0
  101. examples/multi_step/judges/crafter_backend_judge.py +220 -0
  102. examples/multi_step/judges/verilog_backend_judge.py +234 -0
  103. examples/multi_step/readme.md +48 -0
  104. examples/multi_step/run_sft_qwen30b.sh +45 -0
  105. examples/multi_step/sse_metrics_streaming_notes.md +357 -0
  106. examples/multi_step/task_app_config_notes.md +494 -0
  107. examples/multi_step/verilog_rl_lora.md +218 -0
  108. examples/qwen_coder/README.md +102 -0
  109. examples/qwen_coder/_shared.py +113 -0
  110. examples/qwen_coder/configs/coder_lora_30b.toml +60 -0
  111. examples/qwen_coder/configs/coder_lora_4b.toml +61 -0
  112. examples/qwen_coder/configs/coder_lora_small.toml +57 -0
  113. examples/qwen_coder/generate_dataset.py +98 -0
  114. examples/qwen_coder/infer_ft_smoke.py +65 -0
  115. examples/qwen_coder/infer_prod_proxy.py +73 -0
  116. examples/qwen_coder/infer_via_synth.py +87 -0
  117. examples/qwen_coder/scripts/infer_coder.sh +19 -0
  118. examples/qwen_coder/scripts/train_coder_30b.sh +22 -0
  119. examples/qwen_coder/sft_full_17b.py +103 -0
  120. examples/qwen_coder/sft_lora_30b.py +110 -0
  121. examples/qwen_coder/subset_jsonl.py +39 -0
  122. examples/qwen_coder/todos.md +38 -0
  123. examples/qwen_coder/validate_jsonl.py +60 -0
  124. examples/qwen_vl/BUGS_AND_FIXES.md +232 -0
  125. examples/qwen_vl/IMAGE_VALIDATION_COMPLETE.md +271 -0
  126. examples/qwen_vl/IMAGE_VALIDATION_SUMMARY.md +260 -0
  127. examples/qwen_vl/INFERENCE_SFT_TESTS.md +412 -0
  128. examples/qwen_vl/NEXT_STEPS_2B.md +325 -0
  129. examples/qwen_vl/QUICKSTART.md +327 -0
  130. examples/qwen_vl/QUICKSTART_RL_VISION.md +110 -0
  131. examples/qwen_vl/README.md +152 -0
  132. examples/qwen_vl/RL_VISION_COMPLETE.md +475 -0
  133. examples/qwen_vl/RL_VISION_TESTING.md +333 -0
  134. examples/qwen_vl/SDK_VISION_INTEGRATION.md +328 -0
  135. examples/qwen_vl/SETUP_COMPLETE.md +274 -0
  136. examples/qwen_vl/VISION_TESTS_COMPLETE.md +489 -0
  137. examples/qwen_vl/VLM_PIPELINE_COMPLETE.md +242 -0
  138. examples/qwen_vl/__init__.py +2 -0
  139. examples/qwen_vl/collect_data_via_cli.md +415 -0
  140. examples/qwen_vl/collect_vision_traces.py +368 -0
  141. examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +110 -0
  142. examples/qwen_vl/configs/crafter_vlm_sft_example.toml +59 -0
  143. examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +26 -0
  144. examples/qwen_vl/configs/eval_gpt4o_vision_proper.toml +29 -0
  145. examples/qwen_vl/configs/eval_gpt5nano_vision.toml +26 -0
  146. examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
  147. examples/qwen_vl/configs/filter_qwen3vl_sft.toml +49 -0
  148. examples/qwen_vl/configs/filter_vision_sft.toml +52 -0
  149. examples/qwen_vl/configs/filter_vision_test.toml +8 -0
  150. examples/qwen_vl/configs/sft_qwen3_vl_2b_test.toml +54 -0
  151. examples/qwen_vl/crafter_gpt5nano_agent.py +308 -0
  152. examples/qwen_vl/crafter_qwen_vl_agent.py +300 -0
  153. examples/qwen_vl/run_vision_comparison.sh +61 -0
  154. examples/qwen_vl/run_vision_sft_pipeline.sh +175 -0
  155. examples/qwen_vl/test_image_validation.py +201 -0
  156. examples/qwen_vl/test_sft_vision_data.py +110 -0
  157. examples/rl/README.md +169 -0
  158. examples/rl/configs/eval_base_qwen.toml +17 -0
  159. examples/rl/configs/eval_rl_qwen.toml +13 -0
  160. examples/rl/configs/rl_from_base_qwen.toml +62 -0
  161. examples/rl/configs/rl_from_base_qwen17.toml +80 -0
  162. examples/rl/configs/rl_from_ft_qwen.toml +37 -0
  163. examples/rl/download_dataset.py +80 -0
  164. examples/rl/run_eval.py +436 -0
  165. examples/rl/run_rl_and_save.py +111 -0
  166. examples/rl/task_app/README.md +21 -0
  167. {synth_ai/task/apps → examples/rl/task_app}/math_single_step.py +188 -50
  168. examples/rl/task_app/math_task_app.py +111 -0
  169. examples/run_crafter_demo.sh +10 -0
  170. examples/sdk_prompt_learning_example.py +55 -0
  171. examples/sft/README.md +139 -0
  172. examples/sft/configs/crafter_fft_qwen0p6b.toml +49 -0
  173. examples/sft/configs/crafter_lora_qwen0p6b.toml +49 -0
  174. examples/sft/evaluate.py +117 -0
  175. examples/sft/export_dataset.py +120 -0
  176. examples/sft/generate_traces.py +164 -0
  177. examples/swe/__init__.py +12 -0
  178. examples/swe/task_app/README.md +135 -0
  179. examples/swe/task_app/__init__.py +2 -0
  180. examples/swe/task_app/grpo_swe_mini.py +604 -0
  181. examples/swe/task_app/grpo_swe_mini_task_app.py +124 -0
  182. examples/swe/task_app/hosted/README.md +173 -0
  183. examples/swe/task_app/hosted/__init__.py +5 -0
  184. examples/swe/task_app/hosted/branching.py +143 -0
  185. examples/swe/task_app/hosted/environment_routes.py +1289 -0
  186. examples/swe/task_app/hosted/envs/__init__.py +1 -0
  187. examples/swe/task_app/hosted/envs/crafter/__init__.py +6 -0
  188. examples/swe/task_app/hosted/envs/crafter/app.py +1 -0
  189. examples/swe/task_app/hosted/envs/crafter/environment.py +522 -0
  190. examples/swe/task_app/hosted/envs/crafter/policy.py +478 -0
  191. examples/swe/task_app/hosted/envs/crafter/react_agent.py +108 -0
  192. examples/swe/task_app/hosted/envs/crafter/shared.py +305 -0
  193. examples/swe/task_app/hosted/envs/crafter/tools.py +47 -0
  194. examples/swe/task_app/hosted/envs/mini_swe/__init__.py +8 -0
  195. examples/swe/task_app/hosted/envs/mini_swe/environment.py +1191 -0
  196. examples/swe/task_app/hosted/envs/mini_swe/policy.py +355 -0
  197. examples/swe/task_app/hosted/envs/mini_swe/shared.py +83 -0
  198. examples/swe/task_app/hosted/envs/mini_swe/tools.py +96 -0
  199. examples/swe/task_app/hosted/hosted_app.py +204 -0
  200. examples/swe/task_app/hosted/inference/__init__.py +5 -0
  201. examples/swe/task_app/hosted/inference/openai_client.py +584 -0
  202. examples/swe/task_app/hosted/main.py +100 -0
  203. examples/swe/task_app/hosted/policy_routes.py +1094 -0
  204. examples/swe/task_app/hosted/registry.py +195 -0
  205. examples/swe/task_app/hosted/rollout.py +1905 -0
  206. examples/swe/task_app/hosted/storage/__init__.py +5 -0
  207. examples/swe/task_app/hosted/storage/volume.py +211 -0
  208. examples/swe/task_app/hosted/test_agents.py +161 -0
  209. examples/swe/task_app/hosted/test_service.py +136 -0
  210. examples/swe/task_app/hosted/utils.py +62 -0
  211. examples/swe/task_app/morph_backend.py +178 -0
  212. examples/task_apps/IMAGE_ONLY_EVAL_QUICKSTART.md +258 -0
  213. examples/task_apps/TESTING.md +275 -0
  214. examples/task_apps/banking77/__init__.py +6 -0
  215. examples/task_apps/banking77/banking77_task_app.py +912 -0
  216. examples/task_apps/banking77/deploy_wrapper.py +46 -0
  217. examples/task_apps/banking77_pipeline/__init__.py +6 -0
  218. examples/task_apps/banking77_pipeline/banking77_pipeline_task_app.py +489 -0
  219. examples/task_apps/banking77_pipeline/deploy_wrapper.py +50 -0
  220. examples/task_apps/crafter/CREATE_SFT_DATASET.md +286 -0
  221. examples/task_apps/crafter/EVAL_IMAGE_ONLY_RESULTS.md +152 -0
  222. examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +187 -0
  223. examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +281 -0
  224. examples/task_apps/crafter/QUERY_EXAMPLES.md +203 -0
  225. examples/task_apps/crafter/README_IMAGE_ONLY_EVAL.md +316 -0
  226. examples/task_apps/crafter/eval_image_only_gpt4o.toml +28 -0
  227. examples/task_apps/crafter/eval_text_only_groq_llama.toml +36 -0
  228. examples/task_apps/crafter/filter_sft_dataset.toml +16 -0
  229. examples/task_apps/crafter/task_app/README.md +42 -0
  230. examples/task_apps/crafter/task_app/__init__.py +5 -0
  231. examples/task_apps/crafter/task_app/grpo_crafter.py +1055 -0
  232. examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +146 -0
  233. examples/task_apps/crafter/task_app/synth_envs_hosted/README.md +173 -0
  234. examples/task_apps/crafter/task_app/synth_envs_hosted/__init__.py +5 -0
  235. examples/task_apps/crafter/task_app/synth_envs_hosted/branching.py +143 -0
  236. examples/task_apps/crafter/task_app/synth_envs_hosted/environment_routes.py +1226 -0
  237. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/__init__.py +1 -0
  238. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
  239. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
  240. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/environment.py +532 -0
  241. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +583 -0
  242. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +122 -0
  243. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
  244. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
  245. examples/task_apps/crafter/task_app/synth_envs_hosted/hosted_app.py +253 -0
  246. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/__init__.py +5 -0
  247. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +999 -0
  248. examples/task_apps/crafter/task_app/synth_envs_hosted/main.py +100 -0
  249. examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +1252 -0
  250. examples/task_apps/crafter/task_app/synth_envs_hosted/registry.py +195 -0
  251. examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +2233 -0
  252. examples/task_apps/crafter/task_app/synth_envs_hosted/storage/__init__.py +5 -0
  253. examples/task_apps/crafter/task_app/synth_envs_hosted/storage/volume.py +211 -0
  254. examples/task_apps/crafter/task_app/synth_envs_hosted/test_agents.py +161 -0
  255. examples/task_apps/crafter/task_app/synth_envs_hosted/test_service.py +136 -0
  256. examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +411 -0
  257. examples/task_apps/dev/pokemon_emerald/__init__.py +2 -0
  258. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/README.md +811 -0
  259. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/__init__.py +120 -0
  260. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/action.py +160 -0
  261. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/memory.py +155 -0
  262. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/perception.py +69 -0
  263. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/planning.py +96 -0
  264. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/simple.py +1502 -0
  265. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/system_prompt.py +4 -0
  266. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/grab_map.py +68 -0
  267. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/manual.py +216 -0
  268. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/__init__.py +35 -0
  269. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emerald_utils.py +631 -0
  270. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emulator.py +1544 -0
  271. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/enums.py +1428 -0
  272. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/memory_reader.py +4848 -0
  273. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/types.py +41 -0
  274. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/utils.py +298 -0
  275. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pyproject.toml +95 -0
  276. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/run.py +204 -0
  277. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/app.py +2152 -0
  278. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/client.py +429 -0
  279. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/frame_server.py +155 -0
  280. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/README.md +78 -0
  281. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/run_tests.py +122 -0
  282. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_direct.py +76 -0
  283. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_prompts.py +413 -0
  284. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_battle_state_formatting.py +204 -0
  285. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection.py +133 -0
  286. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection_comprehensive.py +229 -0
  287. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_direct_agent_emulator.py +300 -0
  288. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_fps_adjustment_pytest.py +205 -0
  289. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_direct.py +200 -0
  290. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_transition.py +284 -0
  291. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_map_ground_truth_comparison.py +468 -0
  292. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_memory_map.py +575 -0
  293. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_server_map_validation.py +311 -0
  294. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_torchic_state.py +259 -0
  295. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/anticheat.py +372 -0
  296. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/checkpoint.py +296 -0
  297. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/error_handler.py +275 -0
  298. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/get_local_ip.py +22 -0
  299. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/helpers.py +44 -0
  300. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/llm_logger.py +514 -0
  301. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_formatter.py +415 -0
  302. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher.py +1763 -0
  303. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher_singleton.py +33 -0
  304. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_trimmer.py +106 -0
  305. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_visualizer.py +334 -0
  306. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/ocr_dialogue.py +1020 -0
  307. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/recording.py +188 -0
  308. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/state_formatter.py +1481 -0
  309. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/vlm.py +862 -0
  310. examples/task_apps/dev/pokemon_emerald/modal_app.py +114 -0
  311. examples/task_apps/dev/pokemon_emerald/task_app/README.md +81 -0
  312. examples/task_apps/dev/pokemon_emerald/task_app/__init__.py +6 -0
  313. examples/task_apps/dev/pokemon_emerald/task_app/pokemon_emerald.py +685 -0
  314. examples/task_apps/enron/__init__.py +2 -0
  315. examples/task_apps/enron/eval_groq_qwen32.toml +16 -0
  316. examples/task_apps/enron/filter_sft.toml +5 -0
  317. examples/task_apps/enron/task_app/README.md +14 -0
  318. examples/task_apps/enron/task_app/__init__.py +1 -0
  319. examples/task_apps/enron/task_app/grpo_enron.py +906 -0
  320. examples/task_apps/enron/task_app/grpo_enron_task_app.py +146 -0
  321. examples/task_apps/enron/tests/__init__.py +4 -0
  322. examples/task_apps/enron/tests/conftest.py +115 -0
  323. examples/task_apps/enron/tests/integration/__init__.py +4 -0
  324. examples/task_apps/enron/tests/integration/test_enron_eval.py +179 -0
  325. examples/task_apps/enron/tests/integration/test_enron_rollout.py +135 -0
  326. examples/task_apps/enron/tests/unit/__init__.py +4 -0
  327. examples/task_apps/enron/tests/unit/test_enron_environment.py +126 -0
  328. examples/task_apps/gepa_benchmarks/__init__.py +7 -0
  329. examples/task_apps/gepa_benchmarks/common.py +260 -0
  330. examples/task_apps/gepa_benchmarks/hotpotqa_task_app.py +507 -0
  331. examples/task_apps/gepa_benchmarks/hover_task_app.py +436 -0
  332. examples/task_apps/gepa_benchmarks/ifbench_task_app.py +563 -0
  333. examples/task_apps/gepa_benchmarks/pupa_task_app.py +460 -0
  334. examples/task_apps/math/README.md +21 -0
  335. examples/task_apps/math/math_single_step.py +1000 -0
  336. examples/task_apps/math/math_task_app.py +115 -0
  337. examples/task_apps/pokemon_battle/__init__.py +2 -0
  338. examples/task_apps/pokemon_battle/modal_app.py +104 -0
  339. examples/task_apps/pokemon_battle/task_app/README.md +68 -0
  340. examples/task_apps/pokemon_battle/task_app/__init__.py +6 -0
  341. examples/task_apps/pokemon_battle/task_app/pokemon_showdown.py +932 -0
  342. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_COMPLETE.md +283 -0
  343. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_STATUS.md +155 -0
  344. examples/task_apps/pokemon_red/README.md +356 -0
  345. examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +428 -0
  346. examples/task_apps/pokemon_red/__init__.py +3 -0
  347. examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +30 -0
  348. examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +224 -0
  349. examples/task_apps/pokemon_red/pallet_town_rl_config.toml +75 -0
  350. examples/task_apps/pokemon_red/task_app.py +1048 -0
  351. examples/task_apps/pokemon_red/test_pallet_town_rewards.py +193 -0
  352. examples/task_apps/sokoban/README.md +306 -0
  353. examples/task_apps/sokoban/__init__.py +3 -0
  354. examples/task_apps/sokoban/eval_groq_qwen32.toml +16 -0
  355. examples/task_apps/sokoban/eval_openai_gpt5.toml +16 -0
  356. examples/task_apps/sokoban/filter_sft.toml +5 -0
  357. examples/task_apps/sokoban/task_app.py +1058 -0
  358. examples/task_apps/sokoban/tests/__init__.py +4 -0
  359. examples/task_apps/sokoban/tests/conftest.py +113 -0
  360. examples/task_apps/sokoban/tests/integration/__init__.py +4 -0
  361. examples/task_apps/sokoban/tests/integration/test_sokoban_eval.py +57 -0
  362. examples/task_apps/sokoban/tests/integration/test_sokoban_rollout.py +198 -0
  363. examples/task_apps/sokoban/tests/unit/__init__.py +4 -0
  364. examples/task_apps/sokoban/tests/unit/test_sokoban_environment.py +114 -0
  365. examples/task_apps/verilog/__init__.py +1 -0
  366. examples/task_apps/verilog/eval_groq_qwen32b.toml +22 -0
  367. examples/task_apps/verilog/filter_sft.toml +5 -0
  368. examples/task_apps/verilog/task_app/README.md +12 -0
  369. examples/task_apps/verilog/task_app/__init__.py +1 -0
  370. examples/task_apps/verilog/task_app/grpo_verilog.py +1166 -0
  371. examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +145 -0
  372. examples/task_apps/verilog/tests/__init__.py +4 -0
  373. examples/task_apps/verilog/tests/conftest.py +115 -0
  374. examples/task_apps/verilog/tests/integration/__init__.py +4 -0
  375. examples/task_apps/verilog/tests/integration/test_verilog_eval.py +181 -0
  376. examples/task_apps/verilog/tests/integration/test_verilog_rollout.py +55 -0
  377. examples/task_apps/verilog/tests/unit/__init__.py +4 -0
  378. examples/task_apps/verilog/tests/unit/test_verilog_scoring.py +118 -0
  379. examples/tunnel_gepa_banking77/README.md +106 -0
  380. examples/tunnel_gepa_banking77/banking77_gepa_tunnel.toml +95 -0
  381. examples/tunnel_gepa_banking77/keep_tunnel_running.py +60 -0
  382. examples/tunnel_gepa_banking77/run_gepa_with_tunnel.sh +226 -0
  383. examples/vlm/PROPOSAL.md +53 -0
  384. examples/vlm/README.md +68 -0
  385. examples/vlm/configs/crafter_vlm_gpt4o.toml +49 -0
  386. examples/vlm/crafter_image_only_agent.py +207 -0
  387. examples/vlm/crafter_openai_vlm_agent.py +275 -0
  388. examples/vlm/filter_image_rows.py +63 -0
  389. examples/vlm/run_crafter_vlm_benchmark.py +316 -0
  390. examples/warming_up_to_rl/_utils.py +92 -0
  391. examples/warming_up_to_rl/analyze_trace_db.py +422 -0
  392. examples/warming_up_to_rl/configs/crafter_fft.toml +53 -0
  393. examples/warming_up_to_rl/configs/crafter_fft_4b.toml +54 -0
  394. examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +22 -0
  395. examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +15 -0
  396. examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +24 -0
  397. examples/warming_up_to_rl/configs/eval_stepwise_complex.toml +35 -0
  398. examples/warming_up_to_rl/configs/eval_stepwise_consistent.toml +26 -0
  399. examples/warming_up_to_rl/configs/eval_stepwise_per_achievement.toml +36 -0
  400. examples/warming_up_to_rl/configs/eval_stepwise_simple.toml +32 -0
  401. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +85 -0
  402. examples/warming_up_to_rl/configs/rl_from_ft.toml +58 -0
  403. examples/warming_up_to_rl/export_trace_sft.py +837 -0
  404. examples/warming_up_to_rl/groq_test.py +97 -0
  405. examples/warming_up_to_rl/manage_secrets.py +131 -0
  406. examples/warming_up_to_rl/old/event_rewards.md +234 -0
  407. examples/warming_up_to_rl/old/notes.md +73 -0
  408. examples/warming_up_to_rl/readme.md +110 -0
  409. examples/warming_up_to_rl/run_eval.py +736 -0
  410. examples/warming_up_to_rl/run_fft_and_save.py +380 -0
  411. examples/warming_up_to_rl/run_local_rollout.py +239 -0
  412. examples/warming_up_to_rl/run_local_rollout_modal.py +248 -0
  413. examples/warming_up_to_rl/run_local_rollout_parallel.py +405 -0
  414. examples/warming_up_to_rl/run_local_rollout_traced.py +477 -0
  415. examples/warming_up_to_rl/run_rl_and_save.py +124 -0
  416. examples/warming_up_to_rl/run_rollout_remote.py +156 -0
  417. examples/warming_up_to_rl/task_app/README.md +42 -0
  418. examples/warming_up_to_rl/task_app/grpo_crafter.py +876 -0
  419. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +135 -0
  420. examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
  421. examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
  422. examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +143 -0
  423. examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1226 -0
  424. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
  425. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
  426. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
  427. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +522 -0
  428. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +454 -0
  429. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +108 -0
  430. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
  431. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
  432. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +253 -0
  433. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
  434. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +729 -0
  435. examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +100 -0
  436. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +1114 -0
  437. examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +195 -0
  438. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1891 -0
  439. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
  440. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +211 -0
  441. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +161 -0
  442. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +137 -0
  443. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +129 -0
  444. examples/workflows/math_rl/configs/eval_base_qwen.toml +15 -0
  445. examples/workflows/math_rl/configs/eval_rl_qwen.toml +11 -0
  446. examples/workflows/math_rl/configs/rl_from_base_qwen.toml +62 -0
  447. examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +80 -0
  448. examples/workflows/math_rl/configs/rl_from_ft_qwen.toml +35 -0
  449. examples/workflows/math_rl/download_dataset.py +80 -0
  450. examples/workflows/math_rl/run_eval.py +436 -0
  451. examples/workflows/math_rl/run_rl_and_save.py +111 -0
  452. synth_ai/__init__.py +47 -23
  453. synth_ai/_utils/__init__.py +47 -0
  454. synth_ai/_utils/base_url.py +10 -0
  455. synth_ai/_utils/http.py +10 -0
  456. synth_ai/_utils/prompts.py +10 -0
  457. synth_ai/_utils/task_app_state.py +12 -0
  458. synth_ai/_utils/user_config.py +10 -0
  459. synth_ai/api/models/supported.py +514 -0
  460. synth_ai/api/train/__init__.py +60 -2
  461. synth_ai/api/train/builders.py +347 -39
  462. synth_ai/api/train/cli.py +895 -160
  463. synth_ai/api/train/config_finder.py +103 -25
  464. synth_ai/api/train/configs/__init__.py +65 -0
  465. synth_ai/api/train/configs/prompt_learning.py +496 -0
  466. synth_ai/api/train/configs/rl.py +188 -0
  467. synth_ai/api/train/configs/sft.py +99 -0
  468. synth_ai/api/train/configs/shared.py +81 -0
  469. synth_ai/api/train/env_resolver.py +70 -20
  470. synth_ai/api/train/pollers.py +29 -4
  471. synth_ai/api/train/prompt_learning.py +425 -0
  472. synth_ai/api/train/sft.py +390 -0
  473. synth_ai/api/train/supported_algos.py +147 -0
  474. synth_ai/api/train/task_app.py +6 -4
  475. synth_ai/api/train/utils.py +64 -52
  476. synth_ai/api/train/validators.py +1117 -0
  477. synth_ai/api/tunnel.py +49 -0
  478. synth_ai/auth/credentials.py +94 -0
  479. synth_ai/baseline/__init__.py +25 -0
  480. synth_ai/baseline/config.py +209 -0
  481. synth_ai/baseline/discovery.py +214 -0
  482. synth_ai/baseline/execution.py +146 -0
  483. synth_ai/cfgs.py +227 -0
  484. synth_ai/cli/__init__.py +85 -63
  485. synth_ai/cli/_modal_wrapper.py +31 -0
  486. synth_ai/cli/_storage.py +20 -0
  487. synth_ai/cli/_typer_patch.py +47 -0
  488. synth_ai/cli/_validate_task_app.py +29 -0
  489. synth_ai/cli/balance.py +16 -4
  490. synth_ai/cli/calc.py +36 -21
  491. synth_ai/cli/claude.py +70 -0
  492. synth_ai/cli/codex.py +267 -0
  493. synth_ai/cli/commands/__init__.py +18 -0
  494. synth_ai/cli/commands/baseline/__init__.py +12 -0
  495. synth_ai/cli/commands/baseline/core.py +637 -0
  496. synth_ai/cli/commands/baseline/list.py +93 -0
  497. synth_ai/cli/commands/demo/__init__.py +6 -0
  498. synth_ai/cli/commands/demo/core.py +163 -0
  499. synth_ai/cli/commands/eval/__init__.py +19 -0
  500. synth_ai/cli/commands/eval/core.py +1112 -0
  501. synth_ai/cli/commands/eval/errors.py +81 -0
  502. synth_ai/cli/commands/eval/validation.py +133 -0
  503. synth_ai/cli/commands/filter/__init__.py +12 -0
  504. synth_ai/cli/commands/filter/core.py +424 -0
  505. synth_ai/cli/commands/filter/errors.py +55 -0
  506. synth_ai/cli/commands/filter/validation.py +77 -0
  507. synth_ai/cli/commands/help/__init__.py +185 -0
  508. synth_ai/cli/commands/help/core.py +72 -0
  509. synth_ai/cli/commands/smoke/__init__.py +7 -0
  510. synth_ai/cli/commands/smoke/core.py +1437 -0
  511. synth_ai/cli/commands/status/__init__.py +66 -0
  512. synth_ai/cli/commands/status/client.py +192 -0
  513. synth_ai/cli/commands/status/config.py +92 -0
  514. synth_ai/cli/commands/status/errors.py +20 -0
  515. synth_ai/cli/commands/status/formatters.py +164 -0
  516. synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
  517. synth_ai/cli/commands/status/subcommands/files.py +79 -0
  518. synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
  519. synth_ai/cli/commands/status/subcommands/models.py +79 -0
  520. synth_ai/cli/commands/status/subcommands/pricing.py +22 -0
  521. synth_ai/cli/commands/status/subcommands/runs.py +81 -0
  522. synth_ai/cli/commands/status/subcommands/session.py +183 -0
  523. synth_ai/cli/commands/status/subcommands/summary.py +47 -0
  524. synth_ai/cli/commands/status/subcommands/usage.py +203 -0
  525. synth_ai/cli/commands/status/utils.py +114 -0
  526. synth_ai/cli/commands/train/__init__.py +53 -0
  527. synth_ai/cli/commands/train/core.py +21 -0
  528. synth_ai/cli/commands/train/errors.py +117 -0
  529. synth_ai/cli/commands/train/judge_schemas.py +200 -0
  530. synth_ai/cli/commands/train/judge_validation.py +305 -0
  531. synth_ai/cli/commands/train/validation.py +386 -0
  532. synth_ai/cli/demo.py +32 -140
  533. synth_ai/cli/deploy.py +233 -0
  534. synth_ai/cli/eval/__init__.py +36 -0
  535. synth_ai/cli/eval/core.py +5 -0
  536. synth_ai/cli/eval/errors.py +31 -0
  537. synth_ai/cli/eval/validation.py +5 -0
  538. synth_ai/cli/filter/__init__.py +28 -0
  539. synth_ai/cli/filter/core.py +5 -0
  540. synth_ai/cli/filter/errors.py +23 -0
  541. synth_ai/cli/filter/validation.py +5 -0
  542. synth_ai/cli/legacy_root_backup.py +28 -22
  543. synth_ai/cli/lib/__init__.py +10 -0
  544. synth_ai/cli/lib/task_app_discovery.py +7 -0
  545. synth_ai/cli/lib/task_app_env.py +518 -0
  546. synth_ai/cli/mcp.py +34 -0
  547. synth_ai/cli/modal_serve/__init__.py +12 -0
  548. synth_ai/cli/modal_serve/core.py +14 -0
  549. synth_ai/cli/modal_serve/errors.py +8 -0
  550. synth_ai/cli/modal_serve/validation.py +11 -0
  551. synth_ai/cli/opencode.py +256 -0
  552. synth_ai/cli/recent.py +13 -7
  553. synth_ai/cli/rl_demo.py +156 -116
  554. synth_ai/cli/root.py +131 -132
  555. synth_ai/cli/serve/__init__.py +12 -0
  556. synth_ai/cli/serve/core.py +14 -0
  557. synth_ai/cli/serve/errors.py +8 -0
  558. synth_ai/cli/serve/validation.py +11 -0
  559. synth_ai/cli/setup.py +49 -0
  560. synth_ai/cli/status.py +7 -125
  561. synth_ai/cli/task_app_deploy.py +7 -0
  562. synth_ai/cli/task_app_list.py +25 -0
  563. synth_ai/cli/task_app_modal_serve.py +11 -0
  564. synth_ai/cli/task_app_serve.py +11 -0
  565. synth_ai/cli/task_apps.py +2284 -257
  566. synth_ai/cli/traces.py +9 -5
  567. synth_ai/cli/train/__init__.py +12 -0
  568. synth_ai/cli/train/core.py +21 -0
  569. synth_ai/cli/train/errors.py +8 -0
  570. synth_ai/cli/train/validation.py +24 -0
  571. synth_ai/cli/train.py +5 -0
  572. synth_ai/cli/turso.py +73 -0
  573. synth_ai/cli/watch.py +13 -18
  574. synth_ai/demos/__init__.py +10 -0
  575. synth_ai/demos/core/__init__.py +28 -1
  576. synth_ai/demos/core/cli.py +579 -291
  577. synth_ai/demos/crafter/__init__.py +1 -0
  578. synth_ai/demos/crafter/crafter_fft_4b.toml +55 -0
  579. synth_ai/demos/crafter/grpo_crafter_task_app.py +185 -0
  580. synth_ai/demos/crafter/rl_from_base_qwen4b.toml +74 -0
  581. synth_ai/demos/demo_registry.py +176 -0
  582. synth_ai/demos/demo_task_apps/__init__.py +3 -3
  583. synth_ai/demos/demo_task_apps/core.py +64 -28
  584. synth_ai/demos/demo_task_apps/crafter/__init__.py +1 -0
  585. synth_ai/demos/demo_task_apps/crafter/configs/crafter_fft_4b.toml +53 -0
  586. synth_ai/demos/demo_task_apps/crafter/configs/rl_from_base_qwen4b.toml +73 -0
  587. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +184 -0
  588. synth_ai/demos/demo_task_apps/math/_common.py +1 -2
  589. synth_ai/demos/demo_task_apps/math/app.py +2 -1
  590. synth_ai/demos/demo_task_apps/math/deploy_modal.py +3 -6
  591. synth_ai/demos/demo_task_apps/math/modal_task_app.py +185 -83
  592. synth_ai/demos/demo_task_apps/math/task_app_entry.py +0 -2
  593. synth_ai/demos/math/__init__.py +1 -0
  594. synth_ai/demos/math/_common.py +16 -0
  595. synth_ai/demos/math/app.py +38 -0
  596. synth_ai/demos/math/config.toml +76 -0
  597. synth_ai/demos/math/deploy_modal.py +54 -0
  598. synth_ai/demos/math/modal_task_app.py +703 -0
  599. synth_ai/demos/math/task_app_entry.py +51 -0
  600. synth_ai/environments/environment/core.py +7 -1
  601. synth_ai/environments/examples/bandit/engine.py +12 -5
  602. synth_ai/environments/examples/bandit/environment.py +0 -1
  603. synth_ai/environments/examples/bandit/taskset.py +4 -4
  604. synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +7 -4
  605. synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +9 -5
  606. synth_ai/environments/examples/crafter_classic/environment.py +93 -2
  607. synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +4 -3
  608. synth_ai/environments/examples/enron/engine.py +7 -2
  609. synth_ai/environments/examples/enron/environment.py +68 -0
  610. synth_ai/environments/examples/red/engine.py +60 -12
  611. synth_ai/environments/examples/red/engine_helpers/memory_map.py +7 -0
  612. synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
  613. synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_progression.py +477 -0
  614. synth_ai/environments/examples/red/engine_helpers/state_extraction.py +32 -0
  615. synth_ai/environments/examples/red/environment.py +86 -0
  616. synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
  617. synth_ai/environments/examples/sokoban/taskset.py +116 -0
  618. synth_ai/environments/examples/verilog/engine.py +104 -12
  619. synth_ai/environments/examples/wordle/environment.py +0 -1
  620. synth_ai/environments/reproducibility/tree.py +5 -6
  621. synth_ai/environments/service/app.py +11 -12
  622. synth_ai/environments/service/core_routes.py +10 -9
  623. synth_ai/environments/stateful/engine.py +1 -1
  624. synth_ai/environments/tasks/core.py +1 -0
  625. synth_ai/environments/tasks/filters.py +5 -6
  626. synth_ai/environments/tasks/utils.py +4 -5
  627. synth_ai/evals/__init__.py +15 -0
  628. synth_ai/evals/base.py +14 -5
  629. synth_ai/evals/client.py +82 -0
  630. synth_ai/evals/types.py +42 -0
  631. synth_ai/http.py +8 -22
  632. synth_ai/http_client.py +45 -12
  633. synth_ai/inference/__init__.py +0 -2
  634. synth_ai/inference/client.py +21 -7
  635. synth_ai/jobs/client.py +129 -80
  636. synth_ai/judge_schemas.py +127 -0
  637. synth_ai/learning/__init__.py +51 -6
  638. synth_ai/learning/algorithms.py +14 -0
  639. synth_ai/learning/client.py +122 -30
  640. synth_ai/learning/config.py +2 -40
  641. synth_ai/learning/constants.py +0 -2
  642. synth_ai/learning/ft_client.py +4 -56
  643. synth_ai/learning/health.py +14 -8
  644. synth_ai/learning/jobs.py +43 -47
  645. synth_ai/learning/prompt_learning_client.py +276 -0
  646. synth_ai/learning/prompt_learning_types.py +185 -0
  647. synth_ai/{rl → learning/rl}/__init__.py +14 -5
  648. synth_ai/learning/rl/client.py +269 -0
  649. synth_ai/learning/rl/config.py +31 -0
  650. synth_ai/{rl → learning/rl}/contracts.py +5 -10
  651. synth_ai/{rl → learning/rl}/env_keys.py +45 -16
  652. synth_ai/learning/rl/secrets.py +13 -0
  653. synth_ai/learning/rl_client.py +2 -253
  654. synth_ai/learning/sft/__init__.py +29 -0
  655. synth_ai/learning/sft/client.py +68 -0
  656. synth_ai/learning/sft/config.py +270 -0
  657. synth_ai/learning/sft/data.py +698 -0
  658. synth_ai/learning/sse.py +25 -26
  659. synth_ai/learning/validators.py +29 -25
  660. synth_ai/mcp/__init__.py +5 -0
  661. synth_ai/mcp/__main__.py +8 -0
  662. synth_ai/mcp/main.py +254 -0
  663. synth_ai/mcp/setup.py +100 -0
  664. synth_ai/modal.py +257 -0
  665. synth_ai/pricing/__init__.py +3 -0
  666. synth_ai/pricing/model_pricing.py +64 -0
  667. synth_ai/session/__init__.py +75 -0
  668. synth_ai/session/client.py +383 -0
  669. synth_ai/session/constants.py +63 -0
  670. synth_ai/session/exceptions.py +105 -0
  671. synth_ai/session/manager.py +139 -0
  672. synth_ai/session/models.py +89 -0
  673. synth_ai/session/query.py +110 -0
  674. synth_ai/spec/__init__.py +46 -0
  675. synth_ai/spec/dataclasses.py +149 -0
  676. synth_ai/spec/loader.py +144 -0
  677. synth_ai/spec/serializer.py +199 -0
  678. synth_ai/spec/validation.py +250 -0
  679. synth_ai/streaming/__init__.py +29 -0
  680. synth_ai/streaming/config.py +94 -0
  681. synth_ai/streaming/handlers.py +589 -0
  682. synth_ai/streaming/streamer.py +320 -0
  683. synth_ai/streaming/types.py +95 -0
  684. synth_ai/task/__init__.py +50 -30
  685. synth_ai/task/apps/__init__.py +63 -19
  686. synth_ai/task/auth.py +35 -23
  687. synth_ai/task/client.py +15 -13
  688. synth_ai/task/config.py +261 -0
  689. synth_ai/task/contracts.py +165 -64
  690. synth_ai/task/datasets.py +9 -6
  691. synth_ai/task/errors.py +11 -10
  692. synth_ai/task/health.py +17 -11
  693. synth_ai/task/inference_api.py +101 -0
  694. synth_ai/task/json.py +58 -24
  695. synth_ai/task/proxy.py +59 -66
  696. synth_ai/task/rubrics/__init__.py +55 -0
  697. synth_ai/task/rubrics/loaders.py +156 -0
  698. synth_ai/task/rubrics/models.py +57 -0
  699. synth_ai/task/rubrics/scoring.py +116 -0
  700. synth_ai/task/rubrics/strict.py +149 -0
  701. synth_ai/task/rubrics.py +22 -15
  702. synth_ai/task/server.py +65 -31
  703. synth_ai/task/trace_correlation_helpers.py +328 -0
  704. synth_ai/task/tracing_utils.py +44 -28
  705. synth_ai/task/validators.py +449 -6
  706. synth_ai/task/vendors.py +5 -7
  707. synth_ai/tracing_v3/__init__.py +4 -0
  708. synth_ai/tracing_v3/abstractions.py +21 -4
  709. synth_ai/tracing_v3/config.py +167 -22
  710. synth_ai/tracing_v3/constants.py +21 -0
  711. synth_ai/tracing_v3/db_config.py +42 -29
  712. synth_ai/tracing_v3/decorators.py +80 -45
  713. synth_ai/tracing_v3/examples/basic_usage.py +15 -9
  714. synth_ai/tracing_v3/hooks.py +6 -4
  715. synth_ai/tracing_v3/llm_call_record_helpers.py +161 -61
  716. synth_ai/tracing_v3/migration_helper.py +1 -2
  717. synth_ai/tracing_v3/replica_sync.py +12 -7
  718. synth_ai/tracing_v3/serialization.py +130 -0
  719. synth_ai/tracing_v3/session_tracer.py +73 -16
  720. synth_ai/tracing_v3/storage/base.py +89 -1
  721. synth_ai/tracing_v3/storage/config.py +63 -16
  722. synth_ai/tracing_v3/storage/factory.py +11 -9
  723. synth_ai/tracing_v3/storage/utils.py +15 -11
  724. synth_ai/tracing_v3/trace_utils.py +317 -0
  725. synth_ai/tracing_v3/turso/__init__.py +8 -21
  726. synth_ai/tracing_v3/turso/daemon.py +123 -15
  727. synth_ai/tracing_v3/turso/models.py +5 -2
  728. synth_ai/tracing_v3/turso/native_manager.py +1293 -0
  729. synth_ai/tracing_v3/utils.py +5 -4
  730. synth_ai/tunnel.py +143 -0
  731. synth_ai/tunnel_deploy.py +278 -0
  732. synth_ai/types.py +8 -0
  733. synth_ai/urls.py +11 -0
  734. synth_ai/utils/__init__.py +166 -0
  735. synth_ai/utils/agents.py +74 -0
  736. synth_ai/utils/apps.py +152 -0
  737. synth_ai/utils/base_url.py +94 -0
  738. synth_ai/utils/bin.py +39 -0
  739. synth_ai/utils/claude.py +36 -0
  740. synth_ai/utils/cli.py +284 -0
  741. synth_ai/utils/config.py +81 -0
  742. synth_ai/utils/env.py +346 -0
  743. synth_ai/utils/errors.py +85 -0
  744. synth_ai/utils/http.py +172 -0
  745. synth_ai/utils/json.py +72 -0
  746. synth_ai/utils/log_filter.py +99 -0
  747. synth_ai/utils/logging.py +198 -0
  748. synth_ai/utils/modal.py +299 -0
  749. synth_ai/utils/paths.py +95 -0
  750. synth_ai/utils/process.py +233 -0
  751. synth_ai/utils/prompts.py +39 -0
  752. synth_ai/utils/sqld.py +122 -0
  753. synth_ai/utils/ssl.py +25 -0
  754. synth_ai/utils/task_app_discovery.py +882 -0
  755. synth_ai/utils/task_app_env.py +186 -0
  756. synth_ai/utils/task_app_state.py +318 -0
  757. synth_ai/utils/tunnel/__init__.py +12 -0
  758. synth_ai/utils/tunnel/config.py +55 -0
  759. synth_ai/utils/user_config.py +137 -0
  760. synth_ai/uvicorn.py +77 -0
  761. synth_ai-0.2.23.dev3.dist-info/METADATA +357 -0
  762. synth_ai-0.2.23.dev3.dist-info/RECORD +983 -0
  763. {synth_ai-0.2.9.dev0.dist-info → synth_ai-0.2.23.dev3.dist-info}/entry_points.txt +0 -1
  764. {synth_ai-0.2.9.dev0.dist-info → synth_ai-0.2.23.dev3.dist-info}/top_level.txt +1 -0
  765. synth_ai/cli/man.py +0 -106
  766. synth_ai/core/experiment.py +0 -15
  767. synth_ai/core/system.py +0 -15
  768. synth_ai/demo_registry.py +0 -258
  769. synth_ai/environments/examples/sokoban/units/astar_common.py +0 -95
  770. synth_ai/experimental/synth_oss.py +0 -446
  771. synth_ai/handshake.py +0 -107
  772. synth_ai/install_sqld.sh +0 -40
  773. synth_ai/learning/offline/dpo.py +0 -0
  774. synth_ai/learning/offline/providers.py +0 -7
  775. synth_ai/learning/offline/sft.py +0 -0
  776. synth_ai/learning/offline/shared.py +0 -0
  777. synth_ai/learning/online/grpo.py +0 -0
  778. synth_ai/learning/online/irft.py +0 -0
  779. synth_ai/learning/prompts/banking77_injection_eval.py +0 -168
  780. synth_ai/learning/prompts/gepa.py +0 -0
  781. synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +0 -213
  782. synth_ai/learning/prompts/mipro.py +0 -289
  783. synth_ai/learning/prompts/random_search.py +0 -246
  784. synth_ai/learning/prompts/run_mipro_banking77.py +0 -172
  785. synth_ai/learning/prompts/run_random_search_banking77.py +0 -324
  786. synth_ai/lm/__init__.py +0 -51
  787. synth_ai/lm/caching/constants.py +0 -6
  788. synth_ai/lm/caching/dbs.py +0 -0
  789. synth_ai/lm/caching/ephemeral.py +0 -102
  790. synth_ai/lm/caching/handler.py +0 -137
  791. synth_ai/lm/caching/initialize.py +0 -11
  792. synth_ai/lm/caching/persistent.py +0 -114
  793. synth_ai/lm/config.py +0 -110
  794. synth_ai/lm/constants.py +0 -32
  795. synth_ai/lm/core/__init__.py +0 -8
  796. synth_ai/lm/core/all.py +0 -73
  797. synth_ai/lm/core/exceptions.py +0 -7
  798. synth_ai/lm/core/main.py +0 -319
  799. synth_ai/lm/core/main_v3.py +0 -594
  800. synth_ai/lm/core/synth_models.py +0 -48
  801. synth_ai/lm/core/vendor_clients.py +0 -188
  802. synth_ai/lm/cost/monitor.py +0 -1
  803. synth_ai/lm/cost/statefulness.py +0 -1
  804. synth_ai/lm/injection.py +0 -80
  805. synth_ai/lm/overrides.py +0 -206
  806. synth_ai/lm/provider_support/__init__.py +0 -8
  807. synth_ai/lm/provider_support/anthropic.py +0 -972
  808. synth_ai/lm/provider_support/openai.py +0 -1139
  809. synth_ai/lm/provider_support/suppress_logging.py +0 -31
  810. synth_ai/lm/structured_outputs/handler.py +0 -440
  811. synth_ai/lm/structured_outputs/inject.py +0 -297
  812. synth_ai/lm/structured_outputs/rehabilitate.py +0 -185
  813. synth_ai/lm/tools/__init__.py +0 -3
  814. synth_ai/lm/tools/base.py +0 -172
  815. synth_ai/lm/unified_interface.py +0 -202
  816. synth_ai/lm/vendors/base.py +0 -81
  817. synth_ai/lm/vendors/core/anthropic_api.py +0 -387
  818. synth_ai/lm/vendors/core/gemini_api.py +0 -292
  819. synth_ai/lm/vendors/core/mistral_api.py +0 -322
  820. synth_ai/lm/vendors/core/openai_api.py +0 -225
  821. synth_ai/lm/vendors/core/synth_dev_api.py +0 -0
  822. synth_ai/lm/vendors/local/ollama.py +0 -0
  823. synth_ai/lm/vendors/openai_standard.py +0 -780
  824. synth_ai/lm/vendors/openai_standard_responses.py +0 -256
  825. synth_ai/lm/vendors/retries.py +0 -22
  826. synth_ai/lm/vendors/supported/custom_endpoint.py +0 -417
  827. synth_ai/lm/vendors/supported/deepseek.py +0 -69
  828. synth_ai/lm/vendors/supported/grok.py +0 -75
  829. synth_ai/lm/vendors/supported/groq.py +0 -16
  830. synth_ai/lm/vendors/supported/ollama.py +0 -15
  831. synth_ai/lm/vendors/supported/openrouter.py +0 -74
  832. synth_ai/lm/vendors/supported/together.py +0 -11
  833. synth_ai/lm/vendors/synth_client.py +0 -808
  834. synth_ai/lm/warmup.py +0 -186
  835. synth_ai/rl/secrets.py +0 -19
  836. synth_ai/scripts/verify_rewards.py +0 -100
  837. synth_ai/task/apps/grpo_crafter.py +0 -438
  838. synth_ai/tracing/__init__.py +0 -30
  839. synth_ai/tracing_v1/__init__.py +0 -33
  840. synth_ai/tracing_v3/turso/manager.py +0 -774
  841. synth_ai/v0/tracing/abstractions.py +0 -224
  842. synth_ai/v0/tracing/base_client.py +0 -91
  843. synth_ai/v0/tracing/client_manager.py +0 -131
  844. synth_ai/v0/tracing/config.py +0 -142
  845. synth_ai/v0/tracing/context.py +0 -146
  846. synth_ai/v0/tracing/decorators.py +0 -682
  847. synth_ai/v0/tracing/events/__init__.py +0 -0
  848. synth_ai/v0/tracing/events/manage.py +0 -147
  849. synth_ai/v0/tracing/events/scope.py +0 -86
  850. synth_ai/v0/tracing/events/store.py +0 -228
  851. synth_ai/v0/tracing/immediate_client.py +0 -151
  852. synth_ai/v0/tracing/local.py +0 -18
  853. synth_ai/v0/tracing/log_client_base.py +0 -73
  854. synth_ai/v0/tracing/retry_queue.py +0 -186
  855. synth_ai/v0/tracing/trackers.py +0 -515
  856. synth_ai/v0/tracing/upload.py +0 -512
  857. synth_ai/v0/tracing/utils.py +0 -9
  858. synth_ai/v0/tracing_v1/__init__.py +0 -16
  859. synth_ai/v0/tracing_v1/abstractions.py +0 -224
  860. synth_ai/v0/tracing_v1/base_client.py +0 -91
  861. synth_ai/v0/tracing_v1/client_manager.py +0 -131
  862. synth_ai/v0/tracing_v1/config.py +0 -142
  863. synth_ai/v0/tracing_v1/context.py +0 -146
  864. synth_ai/v0/tracing_v1/decorators.py +0 -703
  865. synth_ai/v0/tracing_v1/events/__init__.py +0 -0
  866. synth_ai/v0/tracing_v1/events/manage.py +0 -147
  867. synth_ai/v0/tracing_v1/events/scope.py +0 -86
  868. synth_ai/v0/tracing_v1/events/store.py +0 -228
  869. synth_ai/v0/tracing_v1/immediate_client.py +0 -151
  870. synth_ai/v0/tracing_v1/local.py +0 -18
  871. synth_ai/v0/tracing_v1/log_client_base.py +0 -73
  872. synth_ai/v0/tracing_v1/retry_queue.py +0 -186
  873. synth_ai/v0/tracing_v1/trackers.py +0 -515
  874. synth_ai/v0/tracing_v1/upload.py +0 -527
  875. synth_ai/v0/tracing_v1/utils.py +0 -9
  876. synth_ai/zyk/__init__.py +0 -30
  877. synth_ai-0.2.9.dev0.dist-info/METADATA +0 -131
  878. synth_ai-0.2.9.dev0.dist-info/RECORD +0 -444
  879. {synth_ai/lm/caching → examples/task_apps}/__init__.py +0 -0
  880. {synth_ai/lm/cost → examples/task_apps/crafter}/__init__.py +0 -0
  881. {synth_ai/lm/structured_outputs → examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server}/__init__.py +0 -0
  882. {synth_ai/lm/vendors → examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests}/__init__.py +0 -0
  883. {synth_ai/lm/vendors/core → examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils}/__init__.py +0 -0
  884. {synth_ai/lm/vendors/local → examples/task_apps/math}/__init__.py +0 -0
  885. {synth_ai/lm/vendors/supported → examples/workflows}/__init__.py +0 -0
  886. {synth_ai/v0/tracing → examples/workflows/math_rl}/__init__.py +0 -0
  887. /synth_ai/{compound/cais.py → cli/__main__.py} +0 -0
  888. /synth_ai/{learning/filtering.py → py.typed} +0 -0
  889. {synth_ai-0.2.9.dev0.dist-info → synth_ai-0.2.23.dev3.dist-info}/WHEEL +0 -0
  890. {synth_ai-0.2.9.dev0.dist-info → synth_ai-0.2.23.dev3.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,1293 @@
1
+ """LibSQL-native trace manager prototype.
2
+
3
+ This module provides the Turso/libsql-backed trace storage implementation. It
4
+ mirrors the public surface area of the historical SQLAlchemy manager while
5
+ executing all operations directly via libsql.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import asyncio
11
+ import json
12
+ import logging
13
+ import re
14
+ from collections.abc import Callable
15
+ from dataclasses import asdict, dataclass
16
+ from datetime import UTC, datetime
17
+ from pathlib import Path
18
+ from typing import TYPE_CHECKING, Any, cast
19
+ from urllib.parse import parse_qsl, urlencode, urlparse, urlunparse
20
+
21
+ import httpx
22
+ import libsql
23
+ from sqlalchemy.engine import make_url
24
+
25
+ from ..abstractions import (
26
+ EnvironmentEvent,
27
+ LMCAISEvent,
28
+ RuntimeEvent,
29
+ SessionMessageContent,
30
+ SessionTrace,
31
+ )
32
+ from ..config import CONFIG
33
+ from ..storage.base import TraceStorage
34
+ from .models import analytics_views
35
+
36
+ if TYPE_CHECKING:
37
+ from sqlite3 import Connection as LibsqlConnection
38
+ else: # pragma: no cover - runtime fallback for typing only
39
+ LibsqlConnection = Any # type: ignore[assignment]
40
+
41
+ _LIBSQL_CONNECT_ATTR = getattr(libsql, "connect", None)
42
+ if _LIBSQL_CONNECT_ATTR is None: # pragma: no cover - defensive guard
43
+ raise RuntimeError("libsql.connect is required for NativeLibsqlTraceManager")
44
+ _libsql_connect: Callable[..., LibsqlConnection] = cast(
45
+ Callable[..., LibsqlConnection],
46
+ _LIBSQL_CONNECT_ATTR,
47
+ )
48
+
49
+ try: # pragma: no cover - exercised only when pandas present
50
+ import pandas as pd # type: ignore
51
+ except Exception: # pragma: no cover
52
+ pd = None # type: ignore[assignment]
53
+
54
+ logger = logging.getLogger(__name__)
55
+
56
+
57
+ @dataclass(slots=True)
58
+ class _ConnectionTarget:
59
+ """Resolved connection target for libsql."""
60
+
61
+ database: str
62
+ sync_url: str | None = None
63
+ auth_token: str | None = None
64
+
65
+
66
+ def _strip_auth_component(url: str) -> tuple[str, str | None]:
67
+ """Remove auth_token query parameter from URL, returning the token separately."""
68
+ parsed = urlparse(url)
69
+ if not parsed.query:
70
+ return url, None
71
+
72
+ params = dict(parse_qsl(parsed.query, keep_blank_values=True))
73
+ token = params.pop("auth_token", None)
74
+ query = urlencode(params, doseq=True)
75
+ sanitised = urlunparse(parsed._replace(query=query))
76
+ return sanitised, token
77
+
78
+
79
+ def _resolve_connection_target(db_url: str | None, auth_token: str | None) -> _ConnectionTarget:
80
+ """Normalise the configured database URL."""
81
+ url = db_url or CONFIG.db_url
82
+ sanitised, token_from_url = _strip_auth_component(url)
83
+ effective_token = auth_token or token_from_url or CONFIG.auth_token
84
+
85
+ # SQLAlchemy-compatible libsql scheme (`sqlite+libsql://<endpoint or path>`)
86
+ if sanitised.startswith("sqlite+libsql://"):
87
+ raise RuntimeError("sqlite+libsql scheme is no longer supported; use libsql://")
88
+
89
+ # Plain SQLite files: file://, /absolute/path, or relative path
90
+ # libsql.connect() handles these without sync_url or auth_token
91
+ if sanitised.startswith("file://") or sanitised.startswith("/") or "://" not in sanitised:
92
+ # Strip file:// prefix if present, libsql.connect handles both formats
93
+ db_path = sanitised.replace("file://", "") if sanitised.startswith("file://") else sanitised
94
+ return _ConnectionTarget(database=db_path, sync_url=None, auth_token=None)
95
+
96
+ # Native libsql URLs (`libsql://...`).
97
+ if sanitised.startswith("libsql://"):
98
+ return _ConnectionTarget(database=sanitised, sync_url=sanitised, auth_token=effective_token)
99
+
100
+ # Fallback to SQLAlchemy URL parsing for anything else we missed.
101
+ try:
102
+ parsed = make_url(sanitised)
103
+ driver = parsed.drivername.lower()
104
+ if driver.startswith("sqlite"):
105
+ database = parsed.database or ""
106
+ if database and database not in {":memory:", ":memory"}:
107
+ # Absolute paths are passed through; relative paths are resolved to cwd
108
+ if database.startswith("/"):
109
+ db_path = database
110
+ else:
111
+ db_path = str(Path(database).expanduser().resolve())
112
+ elif database in {":memory:", ":memory"}:
113
+ db_path = ":memory:"
114
+ else:
115
+ raise RuntimeError("SQLite URL missing database path.")
116
+ return _ConnectionTarget(database=db_path, sync_url=None, auth_token=None)
117
+ if driver.startswith("libsql"):
118
+ database = parsed.render_as_string(hide_password=False)
119
+ return _ConnectionTarget(database=database, sync_url=database, auth_token=effective_token)
120
+ except Exception: # pragma: no cover - defensive guardrail
121
+ logger.debug("Unable to parse db_url via SQLAlchemy", exc_info=True)
122
+
123
+ # Python libsql client uses HTTP API for http:// URLs, not Hrana WebSocket
124
+ # For local sqld with http:// URL, we need to ensure it points to the HTTP API port
125
+ # sqld uses two ports: Hrana WebSocket (e.g. 8080) and HTTP API (e.g. 8081)
126
+ # libsql.connect() with http:// uses HTTP API, so URL should point to HTTP API port
127
+ if sanitised.startswith(("http://", "https://", "libsql://")):
128
+ return _ConnectionTarget(database=sanitised, sync_url=sanitised, auth_token=effective_token)
129
+ raise RuntimeError(f"Unsupported tracing database URL: {sanitised}")
130
+
131
+
132
+ def _json_dumps(value: Any) -> str | None:
133
+ """Serialise Python objects as JSON compatible with the existing schema."""
134
+
135
+ def _default(obj: Any):
136
+ if isinstance(obj, datetime):
137
+ return obj.isoformat()
138
+ return str(obj)
139
+
140
+ if value is None:
141
+ return None
142
+ return json.dumps(value, separators=(",", ":"), default=_default)
143
+
144
+
145
+ def _maybe_datetime(value: Any) -> Any:
146
+ if value is None or isinstance(value, datetime):
147
+ return value
148
+ if isinstance(value, str):
149
+ try:
150
+ return datetime.fromisoformat(value)
151
+ except ValueError:
152
+ pass
153
+ return value
154
+
155
+
156
+ def _load_json(value: Any) -> Any:
157
+ if value is None or isinstance(value, dict | list):
158
+ return value or {}
159
+ if isinstance(value, str):
160
+ try:
161
+ return json.loads(value)
162
+ except (TypeError, ValueError):
163
+ return {}
164
+ return value
165
+
166
+
167
+ _TABLE_DEFINITIONS: tuple[str, ...] = (
168
+ """
169
+ CREATE TABLE IF NOT EXISTS experiments (
170
+ experiment_id VARCHAR PRIMARY KEY,
171
+ name VARCHAR NOT NULL,
172
+ description TEXT,
173
+ created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
174
+ updated_at DATETIME DEFAULT CURRENT_TIMESTAMP,
175
+ configuration TEXT,
176
+ metadata TEXT
177
+ )
178
+ """,
179
+ """
180
+ CREATE TABLE IF NOT EXISTS systems (
181
+ system_id VARCHAR PRIMARY KEY,
182
+ name VARCHAR NOT NULL,
183
+ system_type VARCHAR,
184
+ description TEXT,
185
+ created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
186
+ metadata TEXT
187
+ )
188
+ """,
189
+ """
190
+ CREATE TABLE IF NOT EXISTS system_versions (
191
+ version_id VARCHAR PRIMARY KEY,
192
+ system_id VARCHAR NOT NULL,
193
+ version_number VARCHAR NOT NULL,
194
+ commit_hash VARCHAR,
195
+ created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
196
+ configuration TEXT,
197
+ metadata TEXT,
198
+ FOREIGN KEY(system_id) REFERENCES systems(system_id),
199
+ UNIQUE(system_id, version_number)
200
+ )
201
+ """,
202
+ """
203
+ CREATE TABLE IF NOT EXISTS experimental_systems (
204
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
205
+ experiment_id VARCHAR NOT NULL,
206
+ system_id VARCHAR NOT NULL,
207
+ version_id VARCHAR NOT NULL,
208
+ FOREIGN KEY(experiment_id) REFERENCES experiments(experiment_id),
209
+ FOREIGN KEY(system_id) REFERENCES systems(system_id),
210
+ FOREIGN KEY(version_id) REFERENCES system_versions(version_id)
211
+ )
212
+ """,
213
+ """
214
+ CREATE TABLE IF NOT EXISTS session_traces (
215
+ session_id VARCHAR PRIMARY KEY,
216
+ created_at DATETIME NOT NULL,
217
+ num_timesteps INTEGER NOT NULL,
218
+ num_events INTEGER NOT NULL,
219
+ num_messages INTEGER NOT NULL,
220
+ metadata TEXT,
221
+ experiment_id VARCHAR,
222
+ embedding VECTOR,
223
+ FOREIGN KEY(experiment_id) REFERENCES experiments(experiment_id)
224
+ )
225
+ """,
226
+ """
227
+ CREATE TABLE IF NOT EXISTS session_timesteps (
228
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
229
+ session_id VARCHAR NOT NULL,
230
+ step_id VARCHAR NOT NULL,
231
+ step_index INTEGER NOT NULL,
232
+ turn_number INTEGER,
233
+ started_at DATETIME,
234
+ completed_at DATETIME,
235
+ num_events INTEGER,
236
+ num_messages INTEGER,
237
+ step_metadata TEXT,
238
+ UNIQUE(session_id, step_id),
239
+ FOREIGN KEY(session_id) REFERENCES session_traces(session_id)
240
+ )
241
+ """,
242
+ """
243
+ CREATE TABLE IF NOT EXISTS events (
244
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
245
+ session_id VARCHAR NOT NULL,
246
+ timestep_id INTEGER,
247
+ event_type VARCHAR NOT NULL,
248
+ system_instance_id VARCHAR,
249
+ event_time FLOAT,
250
+ message_time INTEGER,
251
+ created_at DATETIME,
252
+ model_name VARCHAR,
253
+ provider VARCHAR,
254
+ input_tokens INTEGER,
255
+ output_tokens INTEGER,
256
+ total_tokens INTEGER,
257
+ cost_usd INTEGER,
258
+ latency_ms INTEGER,
259
+ span_id VARCHAR,
260
+ trace_id VARCHAR,
261
+ call_records TEXT,
262
+ reward FLOAT,
263
+ terminated BOOLEAN,
264
+ truncated BOOLEAN,
265
+ system_state_before TEXT,
266
+ system_state_after TEXT,
267
+ metadata TEXT,
268
+ event_metadata TEXT,
269
+ embedding VECTOR,
270
+ CHECK (event_type IN ('cais', 'environment', 'runtime')),
271
+ FOREIGN KEY(session_id) REFERENCES session_traces(session_id),
272
+ FOREIGN KEY(timestep_id) REFERENCES session_timesteps(id)
273
+ )
274
+ """,
275
+ """
276
+ CREATE TABLE IF NOT EXISTS messages (
277
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
278
+ session_id VARCHAR NOT NULL,
279
+ timestep_id INTEGER,
280
+ message_type VARCHAR NOT NULL,
281
+ content TEXT NOT NULL,
282
+ timestamp DATETIME,
283
+ event_time FLOAT,
284
+ message_time INTEGER,
285
+ metadata TEXT,
286
+ embedding VECTOR,
287
+ CHECK (message_type IN ('user', 'assistant', 'system', 'tool_use', 'tool_result')),
288
+ FOREIGN KEY(session_id) REFERENCES session_traces(session_id),
289
+ FOREIGN KEY(timestep_id) REFERENCES session_timesteps(id)
290
+ )
291
+ """,
292
+ """
293
+ CREATE TABLE IF NOT EXISTS outcome_rewards (
294
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
295
+ session_id VARCHAR NOT NULL,
296
+ total_reward INTEGER NOT NULL,
297
+ achievements_count INTEGER NOT NULL,
298
+ total_steps INTEGER NOT NULL,
299
+ created_at DATETIME NOT NULL,
300
+ reward_metadata TEXT,
301
+ FOREIGN KEY(session_id) REFERENCES session_traces(session_id)
302
+ )
303
+ """,
304
+ """
305
+ CREATE TABLE IF NOT EXISTS event_rewards (
306
+ id INTEGER PRIMARY KEY AUTOINCREMENT,
307
+ event_id INTEGER NOT NULL,
308
+ session_id VARCHAR NOT NULL,
309
+ message_id INTEGER,
310
+ turn_number INTEGER,
311
+ reward_value FLOAT NOT NULL,
312
+ reward_type VARCHAR,
313
+ "key" VARCHAR,
314
+ annotation TEXT,
315
+ source VARCHAR,
316
+ created_at DATETIME NOT NULL,
317
+ FOREIGN KEY(event_id) REFERENCES events(id),
318
+ FOREIGN KEY(session_id) REFERENCES session_traces(session_id),
319
+ FOREIGN KEY(message_id) REFERENCES messages(id)
320
+ )
321
+ """
322
+ )
323
+
324
+
325
+ _INDEX_DEFINITIONS: tuple[str, ...] = (
326
+ "CREATE INDEX IF NOT EXISTS idx_session_created ON session_traces (created_at)",
327
+ "CREATE INDEX IF NOT EXISTS idx_session_experiment ON session_traces (experiment_id)",
328
+ "CREATE INDEX IF NOT EXISTS idx_timestep_session_step ON session_timesteps (session_id, step_id)",
329
+ "CREATE INDEX IF NOT EXISTS idx_timestep_turn ON session_timesteps (turn_number)",
330
+ "CREATE INDEX IF NOT EXISTS idx_event_session_step ON events (session_id, timestep_id)",
331
+ "CREATE INDEX IF NOT EXISTS idx_event_type ON events (event_type)",
332
+ "CREATE INDEX IF NOT EXISTS idx_event_created ON events (created_at)",
333
+ "CREATE INDEX IF NOT EXISTS idx_event_model ON events (model_name)",
334
+ "CREATE INDEX IF NOT EXISTS idx_event_trace ON events (trace_id)",
335
+ "CREATE INDEX IF NOT EXISTS idx_message_session_step ON messages (session_id, timestep_id)",
336
+ "CREATE INDEX IF NOT EXISTS idx_message_type ON messages (message_type)",
337
+ "CREATE INDEX IF NOT EXISTS idx_message_timestamp ON messages (timestamp)",
338
+ "CREATE INDEX IF NOT EXISTS idx_experiment_created ON experiments (created_at)",
339
+ "CREATE INDEX IF NOT EXISTS idx_experiment_name ON experiments (name)",
340
+ "CREATE INDEX IF NOT EXISTS idx_system_name ON systems (name)",
341
+ "CREATE INDEX IF NOT EXISTS idx_system_type ON systems (system_type)",
342
+ "CREATE UNIQUE INDEX IF NOT EXISTS uq_system_version ON system_versions (system_id, version_number)",
343
+ "CREATE INDEX IF NOT EXISTS idx_version_system ON system_versions (system_id)",
344
+ "CREATE INDEX IF NOT EXISTS idx_version_created ON system_versions (created_at)",
345
+ "CREATE UNIQUE INDEX IF NOT EXISTS uq_experiment_system ON experimental_systems (experiment_id, system_id)",
346
+ "CREATE INDEX IF NOT EXISTS idx_experimental_system ON experimental_systems (experiment_id, system_id)",
347
+ "CREATE INDEX IF NOT EXISTS idx_outcome_rewards_session ON outcome_rewards (session_id)",
348
+ "CREATE INDEX IF NOT EXISTS idx_outcome_rewards_total ON outcome_rewards (total_reward)",
349
+ "CREATE INDEX IF NOT EXISTS idx_event_rewards_session ON event_rewards (session_id)",
350
+ "CREATE INDEX IF NOT EXISTS idx_event_rewards_event ON event_rewards (event_id)",
351
+ "CREATE INDEX IF NOT EXISTS idx_event_rewards_type ON event_rewards (reward_type)",
352
+ 'CREATE INDEX IF NOT EXISTS idx_event_rewards_key ON event_rewards ("key")',
353
+ )
354
+
355
+
356
+ class NativeLibsqlTraceManager(TraceStorage):
357
+ """Libsql-backed trace manager."""
358
+
359
+ def __init__(
360
+ self,
361
+ db_url: str | None = None,
362
+ *,
363
+ auth_token: str | None = None,
364
+ ):
365
+ self._config_auth_token = auth_token
366
+ self._target = _resolve_connection_target(db_url, auth_token)
367
+ self._conn: LibsqlConnection | None = None
368
+ self._conn_lock = asyncio.Lock()
369
+ self._op_lock = asyncio.Lock()
370
+ self._initialized = False
371
+
372
+ def _open_connection(self) -> LibsqlConnection:
373
+ """Open a libsql connection for the resolved target."""
374
+ kwargs: dict[str, Any] = {}
375
+ if self._target.sync_url and self._target.sync_url.startswith("libsql://"):
376
+ kwargs["sync_url"] = self._target.sync_url
377
+ if self._target.auth_token:
378
+ kwargs["auth_token"] = self._target.auth_token
379
+ # Disable automatic background sync; ReplicaSync drives this explicitly.
380
+ kwargs.setdefault("sync_interval", 0)
381
+ logger.debug("Opening libsql connection to %s", self._target.database)
382
+ return _libsql_connect(self._target.database, **kwargs)
383
+
384
+ async def initialize(self):
385
+ """Initialise the backend."""
386
+ async with self._conn_lock:
387
+ if self._initialized:
388
+ return
389
+
390
+ # Fast-fail preflight: if using remote endpoint or local sqld, check health
391
+ # Skip health check for plain SQLite files (sync_url is None)
392
+ if self._target.sync_url:
393
+ try:
394
+ parsed = urlparse(self._target.database or "")
395
+ # Check for local sqld: http://, https://, or libsql://
396
+ if parsed.scheme in ("http", "https", "libsql"):
397
+ host_port = parsed.netloc or ""
398
+ host = (host_port.split(":", 1)[0] or "").strip().lower()
399
+ if host in {"127.0.0.1", "localhost"} and host_port:
400
+ # For http:// URLs, the port should already be the HTTP API port
401
+ # For libsql:// URLs, we need to calculate health check port
402
+ if ":" in host_port:
403
+ port = int(host_port.split(":", 1)[1])
404
+ if parsed.scheme == "libsql":
405
+ # libsql:// uses Hrana port, health check is on HTTP API port (Hrana + 1)
406
+ health_url = f"http://{host}:{port + 1}/health"
407
+ else:
408
+ # http:// already points to HTTP API port
409
+ health_url = f"http://{host}:{port}/health"
410
+ else:
411
+ health_url = f"http://{host_port}/health"
412
+ try:
413
+ async with httpx.AsyncClient(timeout=httpx.Timeout(1.0)) as client:
414
+ resp = await client.get(health_url)
415
+ if resp.status_code != 200:
416
+ raise RuntimeError(
417
+ f"Tracing backend unhealthy at {health_url} (status={resp.status_code})"
418
+ )
419
+ except Exception as exc: # pragma: no cover - network env dependent
420
+ raise RuntimeError(
421
+ f"Tracing backend not reachable at {health_url}. "
422
+ f"Start sqld with both ports: sqld --db-path <path> --hrana-listen-addr {host}:HRANA_PORT --http-listen-addr {host}:HTTP_PORT "
423
+ f"or disable tracing (TASKAPP_TRACING_ENABLED=0)."
424
+ ) from exc
425
+ except Exception:
426
+ # Propagate any preflight failure to abort early
427
+ raise
428
+
429
+ # Establish a libsql connection for future native operations.
430
+ self._conn = self._open_connection()
431
+ self._ensure_schema()
432
+ self._initialized = True
433
+
434
+ async def close(self):
435
+ """Close the libsql connection."""
436
+ async with self._conn_lock:
437
+ if self._conn:
438
+ logger.debug("Closing libsql connection to %s", self._target.database)
439
+ self._conn.close()
440
+ self._conn = None
441
+ self._initialized = False
442
+
443
+ # ------------------------------------------------------------------
444
+ # Delegated operations (to be swapped with native libsql versions).
445
+ # ------------------------------------------------------------------
446
+
447
+ async def insert_session_trace(self, trace: SessionTrace) -> str:
448
+ await self.initialize()
449
+
450
+ import logging as _logging
451
+ _logger = _logging.getLogger(__name__)
452
+ _logger.info(f"[TRACE_DEBUG] insert_session_trace START: session_id={trace.session_id}, {len(trace.markov_blanket_message_history)} messages")
453
+
454
+ session_exists = await self._session_exists(trace.session_id)
455
+ _logger.info(f"[TRACE_DEBUG] Session exists: {session_exists}")
456
+
457
+ step_id_map: dict[str, int] = {}
458
+
459
+ if session_exists:
460
+ _logger.warning(f"[TRACE_DEBUG] Session {trace.session_id} already exists, skipping events/timesteps, only updating messages!")
461
+ # Don't return early - we need to save messages!
462
+ # Just update metadata
463
+ async with self._op_lock:
464
+ conn = self._conn
465
+ assert conn is not None
466
+ conn.execute(
467
+ "UPDATE session_traces SET metadata = ? WHERE session_id = ?",
468
+ (_json_dumps(trace.metadata or {}), trace.session_id),
469
+ )
470
+ conn.commit()
471
+ # Skip events and timesteps to ensure idempotency
472
+ else:
473
+ created_at = trace.created_at or datetime.now(UTC)
474
+
475
+ async with self._op_lock:
476
+ conn = self._conn
477
+ assert conn is not None
478
+ conn.execute(
479
+ """
480
+ INSERT INTO session_traces (
481
+ session_id,
482
+ created_at,
483
+ num_timesteps,
484
+ num_events,
485
+ num_messages,
486
+ metadata
487
+ )
488
+ VALUES (?, ?, 0, 0, 0, ?)
489
+ """,
490
+ (
491
+ trace.session_id,
492
+ created_at.isoformat(),
493
+ _json_dumps(trace.metadata or {}),
494
+ ),
495
+ )
496
+ conn.commit()
497
+ _logger.info("[TRACE_DEBUG] Session row inserted")
498
+
499
+ # Only insert timesteps and events if this is a new session
500
+ for step in trace.session_time_steps:
501
+ step_db_id = await self.ensure_timestep(
502
+ trace.session_id,
503
+ step_id=step.step_id,
504
+ step_index=step.step_index,
505
+ turn_number=step.turn_number,
506
+ started_at=step.timestamp,
507
+ completed_at=step.completed_at,
508
+ metadata=step.step_metadata or {},
509
+ )
510
+ step_id_map[step.step_id] = step_db_id
511
+
512
+ for event in trace.event_history:
513
+ step_ref = None
514
+ metadata = event.metadata or {}
515
+ if isinstance(metadata, dict):
516
+ step_ref = metadata.get("step_id")
517
+ timestep_db_id = step_id_map.get(step_ref) if step_ref else None
518
+ await self.insert_event_row(
519
+ trace.session_id,
520
+ timestep_db_id=timestep_db_id,
521
+ event=event,
522
+ metadata_override=event.metadata or {},
523
+ )
524
+
525
+ import logging as _logging
526
+ _logger = _logging.getLogger(__name__)
527
+ _logger.info(f"[TRACE_DEBUG] insert_session_trace: saving {len(trace.markov_blanket_message_history)} messages (session_exists={session_exists})")
528
+
529
+ # Only insert messages if this is a new session (for idempotency)
530
+ if not session_exists:
531
+ for idx, msg in enumerate(trace.markov_blanket_message_history):
532
+ metadata = dict(getattr(msg, "metadata", {}) or {})
533
+ step_ref = metadata.get("step_id")
534
+ content_value = msg.content
535
+ if isinstance(msg.content, SessionMessageContent):
536
+ if msg.content.json_payload:
537
+ metadata.setdefault("json_payload", msg.content.json_payload)
538
+ content_value = msg.content.json_payload
539
+ else:
540
+ content_value = msg.content.as_text()
541
+ if msg.content.text:
542
+ metadata.setdefault("text", msg.content.text)
543
+ elif not isinstance(content_value, str):
544
+ try:
545
+ content_value = json.dumps(content_value, ensure_ascii=False)
546
+ except (TypeError, ValueError):
547
+ content_value = str(content_value)
548
+
549
+ _logger.info(f"[TRACE_DEBUG] Message {idx+1}: type={msg.message_type}, content_len={len(str(content_value))}")
550
+
551
+ try:
552
+ await self.insert_message_row(
553
+ trace.session_id,
554
+ timestep_db_id=step_id_map.get(step_ref) if step_ref else None,
555
+ message_type=msg.message_type,
556
+ content=content_value,
557
+ event_time=msg.time_record.event_time,
558
+ message_time=msg.time_record.message_time,
559
+ metadata=metadata,
560
+ )
561
+ _logger.info(f"[TRACE_DEBUG] Message {idx+1}: saved successfully")
562
+ except Exception as exc:
563
+ _logger.error(f"[TRACE_DEBUG] Message {idx+1}: FAILED TO SAVE: {exc}", exc_info=True)
564
+ raise
565
+ else:
566
+ _logger.info("[TRACE_DEBUG] Skipping message insertion for existing session (idempotency)")
567
+
568
+ async with self._op_lock:
569
+ conn = self._conn
570
+ assert conn is not None
571
+ conn.execute(
572
+ "UPDATE session_traces SET num_timesteps = ?, num_events = ?, num_messages = ?, metadata = ? WHERE session_id = ?",
573
+ (
574
+ len(trace.session_time_steps),
575
+ len(trace.event_history),
576
+ len(trace.markov_blanket_message_history),
577
+ _json_dumps(trace.metadata or {}),
578
+ trace.session_id,
579
+ ),
580
+ )
581
+ conn.commit()
582
+
583
+ return trace.session_id
584
+
585
+ async def get_session_trace(self, session_id: str) -> dict[str, Any] | None:
586
+ await self.initialize()
587
+
588
+ async with self._op_lock:
589
+ conn = self._conn
590
+ assert conn is not None
591
+
592
+ session_cursor = conn.execute(
593
+ """
594
+ SELECT session_id,
595
+ created_at,
596
+ num_timesteps,
597
+ num_events,
598
+ num_messages,
599
+ metadata
600
+ FROM session_traces
601
+ WHERE session_id = ?
602
+ """,
603
+ (session_id,),
604
+ )
605
+ session_row = session_cursor.fetchone()
606
+ session_cursor.close()
607
+
608
+ if not session_row:
609
+ return None
610
+
611
+ session_columns = ["session_id", "created_at", "num_timesteps", "num_events", "num_messages", "metadata"]
612
+ session_data = dict(zip(session_columns, session_row, strict=True))
613
+
614
+ timestep_cursor = conn.execute(
615
+ """
616
+ SELECT step_id,
617
+ step_index,
618
+ turn_number,
619
+ started_at,
620
+ completed_at,
621
+ step_metadata
622
+ FROM session_timesteps
623
+ WHERE session_id = ?
624
+ ORDER BY step_index ASC
625
+ """,
626
+ (session_id,),
627
+ )
628
+ timestep_rows = timestep_cursor.fetchall()
629
+ timestep_cursor.close()
630
+
631
+ return {
632
+ "session_id": session_data["session_id"],
633
+ "created_at": _maybe_datetime(session_data["created_at"]),
634
+ "num_timesteps": session_data["num_timesteps"],
635
+ "num_events": session_data["num_events"],
636
+ "num_messages": session_data["num_messages"],
637
+ "metadata": _load_json(session_data["metadata"]),
638
+ "timesteps": [
639
+ {
640
+ "step_id": row[0],
641
+ "step_index": row[1],
642
+ "turn_number": row[2],
643
+ "started_at": _maybe_datetime(row[3]),
644
+ "completed_at": _maybe_datetime(row[4]),
645
+ "metadata": _load_json(row[5]),
646
+ }
647
+ for row in timestep_rows
648
+ ],
649
+ }
650
+
651
+ async def _session_exists(self, session_id: str) -> bool:
652
+ await self.initialize()
653
+ async with self._op_lock:
654
+ conn = self._conn
655
+ assert conn is not None
656
+ cursor = conn.execute(
657
+ "SELECT 1 FROM session_traces WHERE session_id = ?", (session_id,)
658
+ )
659
+ row = cursor.fetchone()
660
+ cursor.close()
661
+ return row is not None
662
+
663
+ @staticmethod
664
+ def _normalise_params(params: dict[str, Any] | None) -> dict[str, Any]:
665
+ if not params:
666
+ return {}
667
+ normalised: dict[str, Any] = {}
668
+ for key, value in params.items():
669
+ if isinstance(value, datetime):
670
+ normalised[key] = value.isoformat()
671
+ else:
672
+ normalised[key] = value
673
+ return normalised
674
+
675
+ @staticmethod
676
+ def _prepare_query_params(query: str, params: dict[str, Any] | list[Any] | tuple[Any, ...]) -> tuple[str, tuple[Any, ...]]:
677
+ if isinstance(params, dict):
678
+ keys: list[str] = []
679
+
680
+ def _replace(match: re.Match[str]) -> str:
681
+ key = match.group(1)
682
+ keys.append(key)
683
+ return "?"
684
+
685
+ new_query = re.sub(r":([a-zA-Z_][a-zA-Z0-9_]*)", _replace, query)
686
+ if not keys:
687
+ raise ValueError("No named parameters found in query for provided mapping")
688
+ values = tuple(params[key] for key in keys)
689
+ return new_query, values
690
+ if isinstance(params, list | tuple):
691
+ return query, tuple(params)
692
+ raise TypeError("Unsupported parameter type for query execution")
693
+
694
+ def _ensure_schema(self) -> None:
695
+ if not self._conn:
696
+ raise RuntimeError("Connection not initialised")
697
+
698
+ for ddl in _TABLE_DEFINITIONS:
699
+ self._conn.execute(ddl)
700
+ for ddl in _INDEX_DEFINITIONS:
701
+ self._conn.execute(ddl)
702
+ for view_sql in analytics_views.values():
703
+ self._conn.execute(view_sql)
704
+ self._conn.commit()
705
+
706
+ async def query_traces(self, query: str, params: dict[str, Any] | None = None) -> Any:
707
+ await self.initialize()
708
+
709
+ async with self._op_lock:
710
+ conn = self._conn
711
+ assert conn is not None
712
+ normalised = self._normalise_params(params)
713
+ if normalised:
714
+ prepared_query, prepared_params = self._prepare_query_params(query, normalised)
715
+ cursor = conn.execute(prepared_query, prepared_params)
716
+ else:
717
+ cursor = conn.execute(query)
718
+ try:
719
+ description = cursor.description or []
720
+ columns = [col[0] for col in description]
721
+ rows = cursor.fetchall()
722
+ finally:
723
+ cursor.close()
724
+
725
+ if not rows:
726
+ if pd is not None:
727
+ return pd.DataFrame(columns=list(columns))
728
+ return []
729
+
730
+ records = [dict(zip(columns, row, strict=True)) for row in rows]
731
+ if pd is not None:
732
+ return pd.DataFrame(records)
733
+ return records
734
+
735
+ async def get_model_usage(
736
+ self,
737
+ start_date=None,
738
+ end_date=None,
739
+ model_name=None,
740
+ ) -> Any:
741
+ query = """
742
+ SELECT * FROM model_usage_stats
743
+ WHERE 1=1
744
+ """
745
+ params: dict[str, Any] = {}
746
+ if start_date:
747
+ params["start_date"] = start_date
748
+ query += " AND last_used >= :start_date"
749
+ if end_date:
750
+ params["end_date"] = end_date
751
+ query += " AND first_used <= :end_date"
752
+ if model_name:
753
+ params["model_name"] = model_name
754
+ query += " AND model_name = :model_name"
755
+ query += " ORDER BY usage_count DESC"
756
+ return await self.query_traces(query, params)
757
+
758
+ async def delete_session(self, session_id: str) -> bool:
759
+ await self.initialize()
760
+
761
+ async with self._op_lock:
762
+ conn = self._conn
763
+ assert conn is not None
764
+
765
+ cursor = conn.execute(
766
+ "SELECT 1 FROM session_traces WHERE session_id = ?", (session_id,)
767
+ )
768
+ exists = cursor.fetchone() is not None
769
+ cursor.close()
770
+ if not exists:
771
+ return False
772
+
773
+ conn.execute("DELETE FROM event_rewards WHERE session_id = ?", (session_id,))
774
+ conn.execute("DELETE FROM outcome_rewards WHERE session_id = ?", (session_id,))
775
+ conn.execute("DELETE FROM messages WHERE session_id = ?", (session_id,))
776
+ conn.execute("DELETE FROM events WHERE session_id = ?", (session_id,))
777
+ conn.execute("DELETE FROM session_timesteps WHERE session_id = ?", (session_id,))
778
+ conn.execute("DELETE FROM session_traces WHERE session_id = ?", (session_id,))
779
+ conn.commit()
780
+ return True
781
+
782
+ # Experiment helpers -------------------------------------------------
783
+ async def create_experiment(
784
+ self,
785
+ experiment_id: str,
786
+ name: str,
787
+ description: str | None = None,
788
+ configuration: dict[str, Any] | None = None,
789
+ ) -> str:
790
+ await self.initialize()
791
+
792
+ async with self._op_lock:
793
+ conn = self._conn
794
+ assert conn is not None
795
+ conn.execute(
796
+ """
797
+ INSERT INTO experiments (experiment_id, name, description, configuration)
798
+ VALUES (?, ?, ?, ?)
799
+ ON CONFLICT(experiment_id) DO UPDATE SET
800
+ name = excluded.name,
801
+ description = excluded.description,
802
+ configuration = excluded.configuration
803
+ """,
804
+ (
805
+ experiment_id,
806
+ name,
807
+ description,
808
+ _json_dumps(configuration or {}),
809
+ ),
810
+ )
811
+ conn.commit()
812
+ return experiment_id
813
+
814
+ async def link_session_to_experiment(self, session_id: str, experiment_id: str):
815
+ await self.initialize()
816
+
817
+ async with self._op_lock:
818
+ conn = self._conn
819
+ assert conn is not None
820
+ conn.execute(
821
+ "UPDATE session_traces SET experiment_id = ? WHERE session_id = ?",
822
+ (experiment_id, session_id),
823
+ )
824
+ conn.commit()
825
+
826
+ async def get_sessions_by_experiment(
827
+ self, experiment_id: str, limit: int | None = None
828
+ ) -> list[dict[str, Any]]:
829
+ await self.initialize()
830
+
831
+ sql = """
832
+ SELECT session_id,
833
+ created_at,
834
+ num_timesteps,
835
+ num_events,
836
+ num_messages,
837
+ metadata
838
+ FROM session_traces
839
+ WHERE experiment_id = ?
840
+ ORDER BY created_at DESC
841
+ """
842
+ params: list[Any] = [experiment_id]
843
+ if limit is not None:
844
+ sql += " LIMIT ?"
845
+ params.append(limit)
846
+
847
+ async with self._op_lock:
848
+ conn = self._conn
849
+ assert conn is not None
850
+ cursor = conn.execute(sql, params)
851
+ rows = cursor.fetchall()
852
+ cursor.close()
853
+
854
+ return [
855
+ {
856
+ "session_id": row[0],
857
+ "created_at": _maybe_datetime(row[1]),
858
+ "num_timesteps": row[2],
859
+ "num_events": row[3],
860
+ "num_messages": row[4],
861
+ "metadata": _load_json(row[5]),
862
+ }
863
+ for row in rows
864
+ ]
865
+
866
+ async def batch_insert_sessions(
867
+ self, traces: list[SessionTrace], batch_size: int | None = None
868
+ ) -> list[str]:
869
+ batch_size = batch_size or CONFIG.batch_size
870
+ inserted: list[str] = []
871
+
872
+ for i in range(0, len(traces), batch_size):
873
+ chunk = traces[i : i + batch_size]
874
+ for trace in chunk:
875
+ session_id = await self.insert_session_trace(trace)
876
+ inserted.append(session_id)
877
+ return inserted
878
+
879
+ # Incremental helpers -----------------------------------------------
880
+ async def ensure_session(
881
+ self,
882
+ session_id: str,
883
+ *,
884
+ created_at=None,
885
+ metadata=None,
886
+ ) -> None:
887
+ await self.initialize()
888
+
889
+ created_at_val = (created_at or datetime.now(UTC)).isoformat()
890
+ metadata_json = _json_dumps(metadata or {})
891
+
892
+ async with self._op_lock:
893
+ conn = self._conn
894
+
895
+ assert conn is not None
896
+ conn.execute(
897
+ """
898
+ INSERT INTO session_traces (
899
+ session_id, created_at, num_timesteps, num_events, num_messages, metadata
900
+ )
901
+ VALUES (?, ?, 0, 0, 0, ?)
902
+ ON CONFLICT(session_id) DO NOTHING
903
+ """,
904
+ (session_id, created_at_val, metadata_json),
905
+ )
906
+ conn.commit()
907
+
908
+ async def ensure_timestep(
909
+ self,
910
+ session_id: str,
911
+ *,
912
+ step_id: str,
913
+ step_index: int,
914
+ turn_number: int | None = None,
915
+ started_at=None,
916
+ completed_at=None,
917
+ metadata=None,
918
+ ) -> int:
919
+ await self.initialize()
920
+
921
+ started_at_val = (started_at or datetime.now(UTC)).isoformat()
922
+ completed_at_val = completed_at.isoformat() if completed_at else None
923
+ metadata_json = _json_dumps(metadata or {})
924
+
925
+ async with self._op_lock:
926
+ conn = self._conn
927
+
928
+ assert conn is not None
929
+ cur = conn.execute(
930
+ """
931
+ SELECT id FROM session_timesteps
932
+ WHERE session_id = ? AND step_id = ?
933
+ """,
934
+ (session_id, step_id),
935
+ )
936
+ row = cur.fetchone()
937
+ if row:
938
+ return int(row[0])
939
+
940
+ cur = conn.execute(
941
+ """
942
+ INSERT INTO session_timesteps (
943
+ session_id,
944
+ step_id,
945
+ step_index,
946
+ turn_number,
947
+ started_at,
948
+ completed_at,
949
+ num_events,
950
+ num_messages,
951
+ step_metadata
952
+ )
953
+ VALUES (?, ?, ?, ?, ?, ?, 0, 0, ?)
954
+ """,
955
+ (
956
+ session_id,
957
+ step_id,
958
+ step_index,
959
+ turn_number,
960
+ started_at_val,
961
+ completed_at_val,
962
+ metadata_json,
963
+ ),
964
+ )
965
+ timestep_id = int(cur.lastrowid)
966
+ conn.execute(
967
+ """
968
+ UPDATE session_traces
969
+ SET num_timesteps = num_timesteps + 1
970
+ WHERE session_id = ?
971
+ """,
972
+ (session_id,),
973
+ )
974
+ conn.commit()
975
+ return timestep_id
976
+
977
+ async def insert_event_row(
978
+ self,
979
+ session_id: str,
980
+ *,
981
+ timestep_db_id: int | None,
982
+ event: Any,
983
+ metadata_override: dict[str, Any] | None = None,
984
+ ) -> int:
985
+ await self.initialize()
986
+
987
+ if not isinstance(event, EnvironmentEvent | LMCAISEvent | RuntimeEvent):
988
+ raise TypeError(f"Unsupported event type for native manager: {type(event)!r}")
989
+
990
+ metadata_json = metadata_override or event.metadata or {}
991
+ event_extra_metadata = getattr(event, "event_metadata", None)
992
+ system_state_before = getattr(event, "system_state_before", None)
993
+ system_state_after = getattr(event, "system_state_after", None)
994
+
995
+ payload: dict[str, Any] = {
996
+ "session_id": session_id,
997
+ "timestep_id": timestep_db_id,
998
+ "system_instance_id": event.system_instance_id,
999
+ "event_time": event.time_record.event_time,
1000
+ "message_time": event.time_record.message_time,
1001
+ "metadata": metadata_json,
1002
+ "event_metadata": event_extra_metadata,
1003
+ "system_state_before": system_state_before,
1004
+ "system_state_after": system_state_after,
1005
+ }
1006
+
1007
+ if isinstance(event, LMCAISEvent):
1008
+ call_records = None
1009
+ if getattr(event, "call_records", None):
1010
+ # Handle both dataclass instances and dicts (from deserialization)
1011
+ call_records = [
1012
+ asdict(record) if not isinstance(record, dict) else record
1013
+ for record in event.call_records
1014
+ ]
1015
+ payload.update(
1016
+ {
1017
+ "event_type": "cais",
1018
+ "model_name": event.model_name,
1019
+ "provider": event.provider,
1020
+ "input_tokens": event.input_tokens,
1021
+ "output_tokens": event.output_tokens,
1022
+ "total_tokens": event.total_tokens,
1023
+ "cost_usd": int(event.cost_usd * 100) if event.cost_usd is not None else None,
1024
+ "latency_ms": event.latency_ms,
1025
+ "span_id": event.span_id,
1026
+ "trace_id": event.trace_id,
1027
+ "call_records": call_records,
1028
+ }
1029
+ )
1030
+ elif isinstance(event, EnvironmentEvent):
1031
+ payload.update(
1032
+ {
1033
+ "event_type": "environment",
1034
+ "reward": event.reward,
1035
+ "terminated": event.terminated,
1036
+ "truncated": event.truncated,
1037
+ }
1038
+ )
1039
+ elif isinstance(event, RuntimeEvent):
1040
+ payload.update(
1041
+ {
1042
+ "event_type": "runtime",
1043
+ "metadata": {**(event.metadata or {}), "actions": event.actions},
1044
+ }
1045
+ )
1046
+
1047
+ async with self._op_lock:
1048
+ conn = self._conn
1049
+
1050
+ assert conn is not None
1051
+ cur = conn.execute(
1052
+ """
1053
+ INSERT INTO events (
1054
+ session_id,
1055
+ timestep_id,
1056
+ event_type,
1057
+ system_instance_id,
1058
+ event_time,
1059
+ message_time,
1060
+ model_name,
1061
+ provider,
1062
+ input_tokens,
1063
+ output_tokens,
1064
+ total_tokens,
1065
+ cost_usd,
1066
+ latency_ms,
1067
+ span_id,
1068
+ trace_id,
1069
+ call_records,
1070
+ reward,
1071
+ terminated,
1072
+ truncated,
1073
+ system_state_before,
1074
+ system_state_after,
1075
+ metadata,
1076
+ event_metadata
1077
+ )
1078
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
1079
+ """,
1080
+ (
1081
+ payload["session_id"],
1082
+ payload["timestep_id"],
1083
+ payload.get("event_type"),
1084
+ payload["system_instance_id"],
1085
+ payload["event_time"],
1086
+ payload["message_time"],
1087
+ payload.get("model_name"),
1088
+ payload.get("provider"),
1089
+ payload.get("input_tokens"),
1090
+ payload.get("output_tokens"),
1091
+ payload.get("total_tokens"),
1092
+ payload.get("cost_usd"),
1093
+ payload.get("latency_ms"),
1094
+ payload.get("span_id"),
1095
+ payload.get("trace_id"),
1096
+ _json_dumps(payload.get("call_records")),
1097
+ payload.get("reward"),
1098
+ payload.get("terminated"),
1099
+ payload.get("truncated"),
1100
+ _json_dumps(payload.get("system_state_before")),
1101
+ _json_dumps(payload.get("system_state_after")),
1102
+ _json_dumps(payload.get("metadata")),
1103
+ _json_dumps(payload.get("event_metadata")),
1104
+ ),
1105
+ )
1106
+ event_id = int(cur.lastrowid)
1107
+ conn.execute(
1108
+ """
1109
+ UPDATE session_traces
1110
+ SET num_events = num_events + 1
1111
+ WHERE session_id = ?
1112
+ """,
1113
+ (session_id,),
1114
+ )
1115
+ if timestep_db_id is not None:
1116
+ conn.execute(
1117
+ """
1118
+ UPDATE session_timesteps
1119
+ SET num_events = num_events + 1
1120
+ WHERE id = ?
1121
+ """,
1122
+ (timestep_db_id,),
1123
+ )
1124
+ conn.commit()
1125
+ return event_id
1126
+
1127
+ async def insert_message_row(
1128
+ self,
1129
+ session_id: str,
1130
+ *,
1131
+ timestep_db_id: int | None,
1132
+ message_type: str,
1133
+ content: Any,
1134
+ event_time: float | None = None,
1135
+ message_time: int | None = None,
1136
+ metadata: dict[str, Any] | None = None,
1137
+ ) -> int:
1138
+ await self.initialize()
1139
+
1140
+ metadata_payload = dict(metadata or {})
1141
+ if isinstance(content, SessionMessageContent):
1142
+ if content.json_payload:
1143
+ metadata_payload.setdefault("json_payload", content.json_payload)
1144
+ content_value = content.json_payload
1145
+ else:
1146
+ content_value = content.as_text()
1147
+ if content.text:
1148
+ metadata_payload.setdefault("text", content.text)
1149
+ else:
1150
+ content_value = content
1151
+ if not isinstance(content_value, str):
1152
+ try:
1153
+ content_value = json.dumps(content_value, ensure_ascii=False)
1154
+ except (TypeError, ValueError):
1155
+ content_value = str(content_value)
1156
+
1157
+ async with self._op_lock:
1158
+ conn = self._conn
1159
+
1160
+ assert conn is not None
1161
+ cur = conn.execute(
1162
+ """
1163
+ INSERT INTO messages (
1164
+ session_id,
1165
+ timestep_id,
1166
+ message_type,
1167
+ content,
1168
+ event_time,
1169
+ message_time,
1170
+ metadata
1171
+ )
1172
+ VALUES (?, ?, ?, ?, ?, ?, ?)
1173
+ """,
1174
+ (
1175
+ session_id,
1176
+ timestep_db_id,
1177
+ message_type,
1178
+ content_value,
1179
+ event_time,
1180
+ message_time,
1181
+ _json_dumps(metadata_payload),
1182
+ ),
1183
+ )
1184
+ message_id = int(cur.lastrowid)
1185
+ conn.execute(
1186
+ """
1187
+ UPDATE session_traces
1188
+ SET num_messages = num_messages + 1
1189
+ WHERE session_id = ?
1190
+ """,
1191
+ (session_id,),
1192
+ )
1193
+ if timestep_db_id is not None:
1194
+ conn.execute(
1195
+ """
1196
+ UPDATE session_timesteps
1197
+ SET num_messages = num_messages + 1
1198
+ WHERE id = ?
1199
+ """,
1200
+ (timestep_db_id,),
1201
+ )
1202
+ conn.commit()
1203
+ return message_id
1204
+
1205
+ async def insert_outcome_reward(
1206
+ self,
1207
+ session_id: str,
1208
+ *,
1209
+ total_reward: int,
1210
+ achievements_count: int,
1211
+ total_steps: int,
1212
+ reward_metadata: dict | None = None,
1213
+ ) -> int:
1214
+ await self.initialize()
1215
+
1216
+ async with self._op_lock:
1217
+ conn = self._conn
1218
+
1219
+ assert conn is not None
1220
+ cur = conn.execute(
1221
+ """
1222
+ INSERT INTO outcome_rewards (
1223
+ session_id,
1224
+ total_reward,
1225
+ achievements_count,
1226
+ total_steps,
1227
+ created_at,
1228
+ reward_metadata
1229
+ )
1230
+ VALUES (?, ?, ?, ?, ?, ?)
1231
+ """,
1232
+ (
1233
+ session_id,
1234
+ total_reward,
1235
+ achievements_count,
1236
+ total_steps,
1237
+ datetime.now(UTC).isoformat(),
1238
+ _json_dumps(reward_metadata),
1239
+ ),
1240
+ )
1241
+ conn.commit()
1242
+ return int(cur.lastrowid)
1243
+
1244
+ async def insert_event_reward(
1245
+ self,
1246
+ session_id: str,
1247
+ *,
1248
+ event_id: int,
1249
+ message_id: int | None = None,
1250
+ turn_number: int | None = None,
1251
+ reward_value: float = 0.0,
1252
+ reward_type: str | None = None,
1253
+ key: str | None = None,
1254
+ annotation: dict[str, Any] | None = None,
1255
+ source: str | None = None,
1256
+ ) -> int:
1257
+ await self.initialize()
1258
+
1259
+ async with self._op_lock:
1260
+ conn = self._conn
1261
+
1262
+ assert conn is not None
1263
+ cur = conn.execute(
1264
+ """
1265
+ INSERT INTO event_rewards (
1266
+ event_id,
1267
+ session_id,
1268
+ message_id,
1269
+ turn_number,
1270
+ reward_value,
1271
+ reward_type,
1272
+ key,
1273
+ annotation,
1274
+ source,
1275
+ created_at
1276
+ )
1277
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
1278
+ """,
1279
+ (
1280
+ event_id,
1281
+ session_id,
1282
+ message_id,
1283
+ turn_number,
1284
+ reward_value,
1285
+ reward_type,
1286
+ key,
1287
+ _json_dumps(annotation),
1288
+ source,
1289
+ datetime.now(UTC).isoformat(),
1290
+ ),
1291
+ )
1292
+ conn.commit()
1293
+ return int(cur.lastrowid)