synth-ai 0.2.9.dev0__py3-none-any.whl → 0.2.23.dev3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (890) hide show
  1. examples/README.md +1 -0
  2. examples/__init__.py +16 -0
  3. examples/analyze_semantic_words.sh +17 -0
  4. examples/baseline/banking77_baseline.py +243 -0
  5. examples/baseline/banking77_pipeline_baseline.py +294 -0
  6. examples/baseline/crafter_baseline.py +407 -0
  7. examples/baseline/pokemon_red_baseline.py +326 -0
  8. examples/baseline/simple_baseline.py +56 -0
  9. examples/baseline/warming_up_to_rl_baseline.py +239 -0
  10. examples/blog_posts/gepa/README.md +355 -0
  11. examples/blog_posts/gepa/configs/banking77_gepa_local.toml +95 -0
  12. examples/blog_posts/gepa/configs/banking77_gepa_test.toml +80 -0
  13. examples/blog_posts/gepa/configs/banking77_mipro_local.toml +50 -0
  14. examples/blog_posts/gepa/configs/banking77_pipeline_gepa_local.toml +101 -0
  15. examples/blog_posts/gepa/configs/banking77_pipeline_gepa_test.toml +96 -0
  16. examples/blog_posts/gepa/configs/hotpotqa_gepa_local.toml +57 -0
  17. examples/blog_posts/gepa/configs/hotpotqa_gepa_qwen.toml +35 -0
  18. examples/blog_posts/gepa/configs/hotpotqa_mipro_local.toml +51 -0
  19. examples/blog_posts/gepa/configs/hover_gepa_local.toml +57 -0
  20. examples/blog_posts/gepa/configs/hover_gepa_qwen.toml +35 -0
  21. examples/blog_posts/gepa/configs/hover_mipro_local.toml +51 -0
  22. examples/blog_posts/gepa/configs/ifbench_gepa_local.toml +57 -0
  23. examples/blog_posts/gepa/configs/ifbench_gepa_qwen.toml +35 -0
  24. examples/blog_posts/gepa/configs/ifbench_mipro_local.toml +51 -0
  25. examples/blog_posts/gepa/configs/pupa_gepa_local.toml +58 -0
  26. examples/blog_posts/gepa/configs/pupa_mipro_local.toml +52 -0
  27. examples/blog_posts/gepa/deploy_banking77_task_app.sh +54 -0
  28. examples/blog_posts/gepa/gepa_baseline.py +204 -0
  29. examples/blog_posts/gepa/query_prompts_example.py +97 -0
  30. examples/blog_posts/gepa/run_gepa_banking77.sh +112 -0
  31. examples/blog_posts/gepa/run_gepa_banking77_pipeline.sh +163 -0
  32. examples/blog_posts/gepa/task_apps.py +105 -0
  33. examples/blog_posts/gepa/test_gepa_local.sh +67 -0
  34. examples/blog_posts/gepa/verify_banking77_setup.sh +123 -0
  35. examples/blog_posts/mipro/README.md +415 -0
  36. examples/blog_posts/mipro/configs/banking77_mipro_local.toml +91 -0
  37. examples/blog_posts/mipro/configs/banking77_mipro_test.toml +87 -0
  38. examples/blog_posts/mipro/configs/banking77_pipeline_mipro_gemini_flash_lite_local.toml +98 -0
  39. examples/blog_posts/mipro/configs/banking77_pipeline_mipro_gpt41mini_local.toml +96 -0
  40. examples/blog_posts/mipro/configs/banking77_pipeline_mipro_local.toml +94 -0
  41. examples/blog_posts/mipro/configs/banking77_pipeline_mipro_test.toml +170 -0
  42. examples/blog_posts/mipro/deploy_banking77_pipeline_task_app.sh +59 -0
  43. examples/blog_posts/mipro/deploy_banking77_task_app.sh +41 -0
  44. examples/blog_posts/mipro/multi_step.md +79 -0
  45. examples/blog_posts/mipro/run_mipro_banking77.sh +191 -0
  46. examples/blog_posts/mipro/run_mipro_banking77_pipeline.sh +171 -0
  47. examples/blog_posts/mipro/run_mipro_banking77_pipeline_gemini_flash_lite.sh +177 -0
  48. examples/blog_posts/mipro/run_mipro_banking77_pipeline_gpt41mini.sh +173 -0
  49. examples/blog_posts/mipro/verify_banking77_setup.sh +117 -0
  50. examples/blog_posts/pokemon_vl/README.md +98 -0
  51. examples/blog_posts/pokemon_vl/configs/eval_gpt5nano.toml +26 -0
  52. examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +27 -0
  53. examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
  54. examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
  55. examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +43 -0
  56. examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
  57. examples/blog_posts/pokemon_vl/extract_images.py +239 -0
  58. examples/blog_posts/pokemon_vl/pokemon_vl_baseline.py +326 -0
  59. examples/blog_posts/pokemon_vl/run_eval_extract_images.py +209 -0
  60. examples/blog_posts/pokemon_vl/run_qwen_eval_extract_images.py +212 -0
  61. examples/blog_posts/pokemon_vl/text_box_analysis.md +106 -0
  62. examples/blog_posts/warming_up_to_rl/ARCHITECTURE.md +195 -0
  63. examples/blog_posts/warming_up_to_rl/FINAL_TEST_RESULTS.md +127 -0
  64. examples/blog_posts/warming_up_to_rl/INFERENCE_SUCCESS.md +132 -0
  65. examples/blog_posts/warming_up_to_rl/README.md +158 -0
  66. examples/blog_posts/warming_up_to_rl/SMOKE_TESTING.md +164 -0
  67. examples/blog_posts/warming_up_to_rl/SMOKE_TEST_COMPLETE.md +253 -0
  68. examples/blog_posts/warming_up_to_rl/configs/eval_baseline_qwen32b_10x20.toml +25 -0
  69. examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
  70. examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b_10x20.toml +26 -0
  71. examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
  72. examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
  73. examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
  74. examples/blog_posts/warming_up_to_rl/configs/smoke_test.toml +75 -0
  75. examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +91 -0
  76. examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
  77. examples/blog_posts/warming_up_to_rl/warming_up_to_rl_baseline.py +187 -0
  78. examples/crafter_debug_render.py +186 -0
  79. examples/dev/qwen3_32b_qlora_4xh100.toml +45 -0
  80. examples/gepa/banking77_pipeline_gepa.toml +96 -0
  81. examples/gepa/multi_stage_gepa_example.toml +84 -0
  82. examples/gepa/run_gepa_banking77_pipeline.sh +157 -0
  83. examples/multi_step/SFT_README.md +147 -0
  84. examples/multi_step/configs/README_verilog_rl.md +77 -0
  85. examples/multi_step/configs/VERILOG_REWARDS.md +103 -0
  86. examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +196 -0
  87. examples/multi_step/configs/crafter_eval_synth_qwen4b.toml +35 -0
  88. examples/multi_step/configs/crafter_eval_text_only_groq_qwen32b.toml +36 -0
  89. examples/multi_step/configs/crafter_rl_outcome.toml +75 -0
  90. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +145 -0
  91. examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +84 -0
  92. examples/multi_step/configs/crafter_rl_stepwise_simple.toml +79 -0
  93. examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
  94. examples/multi_step/configs/crafter_sft_qwen30b_lora.toml +62 -0
  95. examples/multi_step/configs/crafter_synth_backend.md +40 -0
  96. examples/multi_step/configs/verilog_eval_groq_qwen32b.toml +31 -0
  97. examples/multi_step/configs/verilog_eval_synth_qwen8b.toml +33 -0
  98. examples/multi_step/configs/verilog_rl_lora.toml +147 -0
  99. examples/multi_step/convert_traces_to_sft.py +84 -0
  100. examples/multi_step/crafter_rl_lora.md +70 -0
  101. examples/multi_step/judges/crafter_backend_judge.py +220 -0
  102. examples/multi_step/judges/verilog_backend_judge.py +234 -0
  103. examples/multi_step/readme.md +48 -0
  104. examples/multi_step/run_sft_qwen30b.sh +45 -0
  105. examples/multi_step/sse_metrics_streaming_notes.md +357 -0
  106. examples/multi_step/task_app_config_notes.md +494 -0
  107. examples/multi_step/verilog_rl_lora.md +218 -0
  108. examples/qwen_coder/README.md +102 -0
  109. examples/qwen_coder/_shared.py +113 -0
  110. examples/qwen_coder/configs/coder_lora_30b.toml +60 -0
  111. examples/qwen_coder/configs/coder_lora_4b.toml +61 -0
  112. examples/qwen_coder/configs/coder_lora_small.toml +57 -0
  113. examples/qwen_coder/generate_dataset.py +98 -0
  114. examples/qwen_coder/infer_ft_smoke.py +65 -0
  115. examples/qwen_coder/infer_prod_proxy.py +73 -0
  116. examples/qwen_coder/infer_via_synth.py +87 -0
  117. examples/qwen_coder/scripts/infer_coder.sh +19 -0
  118. examples/qwen_coder/scripts/train_coder_30b.sh +22 -0
  119. examples/qwen_coder/sft_full_17b.py +103 -0
  120. examples/qwen_coder/sft_lora_30b.py +110 -0
  121. examples/qwen_coder/subset_jsonl.py +39 -0
  122. examples/qwen_coder/todos.md +38 -0
  123. examples/qwen_coder/validate_jsonl.py +60 -0
  124. examples/qwen_vl/BUGS_AND_FIXES.md +232 -0
  125. examples/qwen_vl/IMAGE_VALIDATION_COMPLETE.md +271 -0
  126. examples/qwen_vl/IMAGE_VALIDATION_SUMMARY.md +260 -0
  127. examples/qwen_vl/INFERENCE_SFT_TESTS.md +412 -0
  128. examples/qwen_vl/NEXT_STEPS_2B.md +325 -0
  129. examples/qwen_vl/QUICKSTART.md +327 -0
  130. examples/qwen_vl/QUICKSTART_RL_VISION.md +110 -0
  131. examples/qwen_vl/README.md +152 -0
  132. examples/qwen_vl/RL_VISION_COMPLETE.md +475 -0
  133. examples/qwen_vl/RL_VISION_TESTING.md +333 -0
  134. examples/qwen_vl/SDK_VISION_INTEGRATION.md +328 -0
  135. examples/qwen_vl/SETUP_COMPLETE.md +274 -0
  136. examples/qwen_vl/VISION_TESTS_COMPLETE.md +489 -0
  137. examples/qwen_vl/VLM_PIPELINE_COMPLETE.md +242 -0
  138. examples/qwen_vl/__init__.py +2 -0
  139. examples/qwen_vl/collect_data_via_cli.md +415 -0
  140. examples/qwen_vl/collect_vision_traces.py +368 -0
  141. examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +110 -0
  142. examples/qwen_vl/configs/crafter_vlm_sft_example.toml +59 -0
  143. examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +26 -0
  144. examples/qwen_vl/configs/eval_gpt4o_vision_proper.toml +29 -0
  145. examples/qwen_vl/configs/eval_gpt5nano_vision.toml +26 -0
  146. examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
  147. examples/qwen_vl/configs/filter_qwen3vl_sft.toml +49 -0
  148. examples/qwen_vl/configs/filter_vision_sft.toml +52 -0
  149. examples/qwen_vl/configs/filter_vision_test.toml +8 -0
  150. examples/qwen_vl/configs/sft_qwen3_vl_2b_test.toml +54 -0
  151. examples/qwen_vl/crafter_gpt5nano_agent.py +308 -0
  152. examples/qwen_vl/crafter_qwen_vl_agent.py +300 -0
  153. examples/qwen_vl/run_vision_comparison.sh +61 -0
  154. examples/qwen_vl/run_vision_sft_pipeline.sh +175 -0
  155. examples/qwen_vl/test_image_validation.py +201 -0
  156. examples/qwen_vl/test_sft_vision_data.py +110 -0
  157. examples/rl/README.md +169 -0
  158. examples/rl/configs/eval_base_qwen.toml +17 -0
  159. examples/rl/configs/eval_rl_qwen.toml +13 -0
  160. examples/rl/configs/rl_from_base_qwen.toml +62 -0
  161. examples/rl/configs/rl_from_base_qwen17.toml +80 -0
  162. examples/rl/configs/rl_from_ft_qwen.toml +37 -0
  163. examples/rl/download_dataset.py +80 -0
  164. examples/rl/run_eval.py +436 -0
  165. examples/rl/run_rl_and_save.py +111 -0
  166. examples/rl/task_app/README.md +21 -0
  167. {synth_ai/task/apps → examples/rl/task_app}/math_single_step.py +188 -50
  168. examples/rl/task_app/math_task_app.py +111 -0
  169. examples/run_crafter_demo.sh +10 -0
  170. examples/sdk_prompt_learning_example.py +55 -0
  171. examples/sft/README.md +139 -0
  172. examples/sft/configs/crafter_fft_qwen0p6b.toml +49 -0
  173. examples/sft/configs/crafter_lora_qwen0p6b.toml +49 -0
  174. examples/sft/evaluate.py +117 -0
  175. examples/sft/export_dataset.py +120 -0
  176. examples/sft/generate_traces.py +164 -0
  177. examples/swe/__init__.py +12 -0
  178. examples/swe/task_app/README.md +135 -0
  179. examples/swe/task_app/__init__.py +2 -0
  180. examples/swe/task_app/grpo_swe_mini.py +604 -0
  181. examples/swe/task_app/grpo_swe_mini_task_app.py +124 -0
  182. examples/swe/task_app/hosted/README.md +173 -0
  183. examples/swe/task_app/hosted/__init__.py +5 -0
  184. examples/swe/task_app/hosted/branching.py +143 -0
  185. examples/swe/task_app/hosted/environment_routes.py +1289 -0
  186. examples/swe/task_app/hosted/envs/__init__.py +1 -0
  187. examples/swe/task_app/hosted/envs/crafter/__init__.py +6 -0
  188. examples/swe/task_app/hosted/envs/crafter/app.py +1 -0
  189. examples/swe/task_app/hosted/envs/crafter/environment.py +522 -0
  190. examples/swe/task_app/hosted/envs/crafter/policy.py +478 -0
  191. examples/swe/task_app/hosted/envs/crafter/react_agent.py +108 -0
  192. examples/swe/task_app/hosted/envs/crafter/shared.py +305 -0
  193. examples/swe/task_app/hosted/envs/crafter/tools.py +47 -0
  194. examples/swe/task_app/hosted/envs/mini_swe/__init__.py +8 -0
  195. examples/swe/task_app/hosted/envs/mini_swe/environment.py +1191 -0
  196. examples/swe/task_app/hosted/envs/mini_swe/policy.py +355 -0
  197. examples/swe/task_app/hosted/envs/mini_swe/shared.py +83 -0
  198. examples/swe/task_app/hosted/envs/mini_swe/tools.py +96 -0
  199. examples/swe/task_app/hosted/hosted_app.py +204 -0
  200. examples/swe/task_app/hosted/inference/__init__.py +5 -0
  201. examples/swe/task_app/hosted/inference/openai_client.py +584 -0
  202. examples/swe/task_app/hosted/main.py +100 -0
  203. examples/swe/task_app/hosted/policy_routes.py +1094 -0
  204. examples/swe/task_app/hosted/registry.py +195 -0
  205. examples/swe/task_app/hosted/rollout.py +1905 -0
  206. examples/swe/task_app/hosted/storage/__init__.py +5 -0
  207. examples/swe/task_app/hosted/storage/volume.py +211 -0
  208. examples/swe/task_app/hosted/test_agents.py +161 -0
  209. examples/swe/task_app/hosted/test_service.py +136 -0
  210. examples/swe/task_app/hosted/utils.py +62 -0
  211. examples/swe/task_app/morph_backend.py +178 -0
  212. examples/task_apps/IMAGE_ONLY_EVAL_QUICKSTART.md +258 -0
  213. examples/task_apps/TESTING.md +275 -0
  214. examples/task_apps/banking77/__init__.py +6 -0
  215. examples/task_apps/banking77/banking77_task_app.py +912 -0
  216. examples/task_apps/banking77/deploy_wrapper.py +46 -0
  217. examples/task_apps/banking77_pipeline/__init__.py +6 -0
  218. examples/task_apps/banking77_pipeline/banking77_pipeline_task_app.py +489 -0
  219. examples/task_apps/banking77_pipeline/deploy_wrapper.py +50 -0
  220. examples/task_apps/crafter/CREATE_SFT_DATASET.md +286 -0
  221. examples/task_apps/crafter/EVAL_IMAGE_ONLY_RESULTS.md +152 -0
  222. examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +187 -0
  223. examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +281 -0
  224. examples/task_apps/crafter/QUERY_EXAMPLES.md +203 -0
  225. examples/task_apps/crafter/README_IMAGE_ONLY_EVAL.md +316 -0
  226. examples/task_apps/crafter/eval_image_only_gpt4o.toml +28 -0
  227. examples/task_apps/crafter/eval_text_only_groq_llama.toml +36 -0
  228. examples/task_apps/crafter/filter_sft_dataset.toml +16 -0
  229. examples/task_apps/crafter/task_app/README.md +42 -0
  230. examples/task_apps/crafter/task_app/__init__.py +5 -0
  231. examples/task_apps/crafter/task_app/grpo_crafter.py +1055 -0
  232. examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +146 -0
  233. examples/task_apps/crafter/task_app/synth_envs_hosted/README.md +173 -0
  234. examples/task_apps/crafter/task_app/synth_envs_hosted/__init__.py +5 -0
  235. examples/task_apps/crafter/task_app/synth_envs_hosted/branching.py +143 -0
  236. examples/task_apps/crafter/task_app/synth_envs_hosted/environment_routes.py +1226 -0
  237. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/__init__.py +1 -0
  238. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
  239. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
  240. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/environment.py +532 -0
  241. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +583 -0
  242. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +122 -0
  243. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
  244. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
  245. examples/task_apps/crafter/task_app/synth_envs_hosted/hosted_app.py +253 -0
  246. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/__init__.py +5 -0
  247. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +999 -0
  248. examples/task_apps/crafter/task_app/synth_envs_hosted/main.py +100 -0
  249. examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +1252 -0
  250. examples/task_apps/crafter/task_app/synth_envs_hosted/registry.py +195 -0
  251. examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +2233 -0
  252. examples/task_apps/crafter/task_app/synth_envs_hosted/storage/__init__.py +5 -0
  253. examples/task_apps/crafter/task_app/synth_envs_hosted/storage/volume.py +211 -0
  254. examples/task_apps/crafter/task_app/synth_envs_hosted/test_agents.py +161 -0
  255. examples/task_apps/crafter/task_app/synth_envs_hosted/test_service.py +136 -0
  256. examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +411 -0
  257. examples/task_apps/dev/pokemon_emerald/__init__.py +2 -0
  258. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/README.md +811 -0
  259. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/__init__.py +120 -0
  260. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/action.py +160 -0
  261. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/memory.py +155 -0
  262. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/perception.py +69 -0
  263. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/planning.py +96 -0
  264. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/simple.py +1502 -0
  265. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/system_prompt.py +4 -0
  266. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/grab_map.py +68 -0
  267. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/manual.py +216 -0
  268. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/__init__.py +35 -0
  269. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emerald_utils.py +631 -0
  270. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emulator.py +1544 -0
  271. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/enums.py +1428 -0
  272. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/memory_reader.py +4848 -0
  273. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/types.py +41 -0
  274. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/utils.py +298 -0
  275. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pyproject.toml +95 -0
  276. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/run.py +204 -0
  277. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/app.py +2152 -0
  278. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/client.py +429 -0
  279. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/frame_server.py +155 -0
  280. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/README.md +78 -0
  281. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/run_tests.py +122 -0
  282. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_direct.py +76 -0
  283. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_prompts.py +413 -0
  284. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_battle_state_formatting.py +204 -0
  285. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection.py +133 -0
  286. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection_comprehensive.py +229 -0
  287. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_direct_agent_emulator.py +300 -0
  288. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_fps_adjustment_pytest.py +205 -0
  289. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_direct.py +200 -0
  290. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_transition.py +284 -0
  291. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_map_ground_truth_comparison.py +468 -0
  292. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_memory_map.py +575 -0
  293. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_server_map_validation.py +311 -0
  294. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_torchic_state.py +259 -0
  295. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/anticheat.py +372 -0
  296. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/checkpoint.py +296 -0
  297. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/error_handler.py +275 -0
  298. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/get_local_ip.py +22 -0
  299. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/helpers.py +44 -0
  300. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/llm_logger.py +514 -0
  301. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_formatter.py +415 -0
  302. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher.py +1763 -0
  303. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher_singleton.py +33 -0
  304. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_trimmer.py +106 -0
  305. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_visualizer.py +334 -0
  306. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/ocr_dialogue.py +1020 -0
  307. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/recording.py +188 -0
  308. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/state_formatter.py +1481 -0
  309. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/vlm.py +862 -0
  310. examples/task_apps/dev/pokemon_emerald/modal_app.py +114 -0
  311. examples/task_apps/dev/pokemon_emerald/task_app/README.md +81 -0
  312. examples/task_apps/dev/pokemon_emerald/task_app/__init__.py +6 -0
  313. examples/task_apps/dev/pokemon_emerald/task_app/pokemon_emerald.py +685 -0
  314. examples/task_apps/enron/__init__.py +2 -0
  315. examples/task_apps/enron/eval_groq_qwen32.toml +16 -0
  316. examples/task_apps/enron/filter_sft.toml +5 -0
  317. examples/task_apps/enron/task_app/README.md +14 -0
  318. examples/task_apps/enron/task_app/__init__.py +1 -0
  319. examples/task_apps/enron/task_app/grpo_enron.py +906 -0
  320. examples/task_apps/enron/task_app/grpo_enron_task_app.py +146 -0
  321. examples/task_apps/enron/tests/__init__.py +4 -0
  322. examples/task_apps/enron/tests/conftest.py +115 -0
  323. examples/task_apps/enron/tests/integration/__init__.py +4 -0
  324. examples/task_apps/enron/tests/integration/test_enron_eval.py +179 -0
  325. examples/task_apps/enron/tests/integration/test_enron_rollout.py +135 -0
  326. examples/task_apps/enron/tests/unit/__init__.py +4 -0
  327. examples/task_apps/enron/tests/unit/test_enron_environment.py +126 -0
  328. examples/task_apps/gepa_benchmarks/__init__.py +7 -0
  329. examples/task_apps/gepa_benchmarks/common.py +260 -0
  330. examples/task_apps/gepa_benchmarks/hotpotqa_task_app.py +507 -0
  331. examples/task_apps/gepa_benchmarks/hover_task_app.py +436 -0
  332. examples/task_apps/gepa_benchmarks/ifbench_task_app.py +563 -0
  333. examples/task_apps/gepa_benchmarks/pupa_task_app.py +460 -0
  334. examples/task_apps/math/README.md +21 -0
  335. examples/task_apps/math/math_single_step.py +1000 -0
  336. examples/task_apps/math/math_task_app.py +115 -0
  337. examples/task_apps/pokemon_battle/__init__.py +2 -0
  338. examples/task_apps/pokemon_battle/modal_app.py +104 -0
  339. examples/task_apps/pokemon_battle/task_app/README.md +68 -0
  340. examples/task_apps/pokemon_battle/task_app/__init__.py +6 -0
  341. examples/task_apps/pokemon_battle/task_app/pokemon_showdown.py +932 -0
  342. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_COMPLETE.md +283 -0
  343. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_STATUS.md +155 -0
  344. examples/task_apps/pokemon_red/README.md +356 -0
  345. examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +428 -0
  346. examples/task_apps/pokemon_red/__init__.py +3 -0
  347. examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +30 -0
  348. examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +224 -0
  349. examples/task_apps/pokemon_red/pallet_town_rl_config.toml +75 -0
  350. examples/task_apps/pokemon_red/task_app.py +1048 -0
  351. examples/task_apps/pokemon_red/test_pallet_town_rewards.py +193 -0
  352. examples/task_apps/sokoban/README.md +306 -0
  353. examples/task_apps/sokoban/__init__.py +3 -0
  354. examples/task_apps/sokoban/eval_groq_qwen32.toml +16 -0
  355. examples/task_apps/sokoban/eval_openai_gpt5.toml +16 -0
  356. examples/task_apps/sokoban/filter_sft.toml +5 -0
  357. examples/task_apps/sokoban/task_app.py +1058 -0
  358. examples/task_apps/sokoban/tests/__init__.py +4 -0
  359. examples/task_apps/sokoban/tests/conftest.py +113 -0
  360. examples/task_apps/sokoban/tests/integration/__init__.py +4 -0
  361. examples/task_apps/sokoban/tests/integration/test_sokoban_eval.py +57 -0
  362. examples/task_apps/sokoban/tests/integration/test_sokoban_rollout.py +198 -0
  363. examples/task_apps/sokoban/tests/unit/__init__.py +4 -0
  364. examples/task_apps/sokoban/tests/unit/test_sokoban_environment.py +114 -0
  365. examples/task_apps/verilog/__init__.py +1 -0
  366. examples/task_apps/verilog/eval_groq_qwen32b.toml +22 -0
  367. examples/task_apps/verilog/filter_sft.toml +5 -0
  368. examples/task_apps/verilog/task_app/README.md +12 -0
  369. examples/task_apps/verilog/task_app/__init__.py +1 -0
  370. examples/task_apps/verilog/task_app/grpo_verilog.py +1166 -0
  371. examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +145 -0
  372. examples/task_apps/verilog/tests/__init__.py +4 -0
  373. examples/task_apps/verilog/tests/conftest.py +115 -0
  374. examples/task_apps/verilog/tests/integration/__init__.py +4 -0
  375. examples/task_apps/verilog/tests/integration/test_verilog_eval.py +181 -0
  376. examples/task_apps/verilog/tests/integration/test_verilog_rollout.py +55 -0
  377. examples/task_apps/verilog/tests/unit/__init__.py +4 -0
  378. examples/task_apps/verilog/tests/unit/test_verilog_scoring.py +118 -0
  379. examples/tunnel_gepa_banking77/README.md +106 -0
  380. examples/tunnel_gepa_banking77/banking77_gepa_tunnel.toml +95 -0
  381. examples/tunnel_gepa_banking77/keep_tunnel_running.py +60 -0
  382. examples/tunnel_gepa_banking77/run_gepa_with_tunnel.sh +226 -0
  383. examples/vlm/PROPOSAL.md +53 -0
  384. examples/vlm/README.md +68 -0
  385. examples/vlm/configs/crafter_vlm_gpt4o.toml +49 -0
  386. examples/vlm/crafter_image_only_agent.py +207 -0
  387. examples/vlm/crafter_openai_vlm_agent.py +275 -0
  388. examples/vlm/filter_image_rows.py +63 -0
  389. examples/vlm/run_crafter_vlm_benchmark.py +316 -0
  390. examples/warming_up_to_rl/_utils.py +92 -0
  391. examples/warming_up_to_rl/analyze_trace_db.py +422 -0
  392. examples/warming_up_to_rl/configs/crafter_fft.toml +53 -0
  393. examples/warming_up_to_rl/configs/crafter_fft_4b.toml +54 -0
  394. examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +22 -0
  395. examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +15 -0
  396. examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +24 -0
  397. examples/warming_up_to_rl/configs/eval_stepwise_complex.toml +35 -0
  398. examples/warming_up_to_rl/configs/eval_stepwise_consistent.toml +26 -0
  399. examples/warming_up_to_rl/configs/eval_stepwise_per_achievement.toml +36 -0
  400. examples/warming_up_to_rl/configs/eval_stepwise_simple.toml +32 -0
  401. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +85 -0
  402. examples/warming_up_to_rl/configs/rl_from_ft.toml +58 -0
  403. examples/warming_up_to_rl/export_trace_sft.py +837 -0
  404. examples/warming_up_to_rl/groq_test.py +97 -0
  405. examples/warming_up_to_rl/manage_secrets.py +131 -0
  406. examples/warming_up_to_rl/old/event_rewards.md +234 -0
  407. examples/warming_up_to_rl/old/notes.md +73 -0
  408. examples/warming_up_to_rl/readme.md +110 -0
  409. examples/warming_up_to_rl/run_eval.py +736 -0
  410. examples/warming_up_to_rl/run_fft_and_save.py +380 -0
  411. examples/warming_up_to_rl/run_local_rollout.py +239 -0
  412. examples/warming_up_to_rl/run_local_rollout_modal.py +248 -0
  413. examples/warming_up_to_rl/run_local_rollout_parallel.py +405 -0
  414. examples/warming_up_to_rl/run_local_rollout_traced.py +477 -0
  415. examples/warming_up_to_rl/run_rl_and_save.py +124 -0
  416. examples/warming_up_to_rl/run_rollout_remote.py +156 -0
  417. examples/warming_up_to_rl/task_app/README.md +42 -0
  418. examples/warming_up_to_rl/task_app/grpo_crafter.py +876 -0
  419. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +135 -0
  420. examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
  421. examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
  422. examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +143 -0
  423. examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1226 -0
  424. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
  425. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
  426. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
  427. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +522 -0
  428. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +454 -0
  429. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +108 -0
  430. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
  431. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
  432. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +253 -0
  433. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
  434. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +729 -0
  435. examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +100 -0
  436. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +1114 -0
  437. examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +195 -0
  438. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1891 -0
  439. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
  440. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +211 -0
  441. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +161 -0
  442. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +137 -0
  443. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +129 -0
  444. examples/workflows/math_rl/configs/eval_base_qwen.toml +15 -0
  445. examples/workflows/math_rl/configs/eval_rl_qwen.toml +11 -0
  446. examples/workflows/math_rl/configs/rl_from_base_qwen.toml +62 -0
  447. examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +80 -0
  448. examples/workflows/math_rl/configs/rl_from_ft_qwen.toml +35 -0
  449. examples/workflows/math_rl/download_dataset.py +80 -0
  450. examples/workflows/math_rl/run_eval.py +436 -0
  451. examples/workflows/math_rl/run_rl_and_save.py +111 -0
  452. synth_ai/__init__.py +47 -23
  453. synth_ai/_utils/__init__.py +47 -0
  454. synth_ai/_utils/base_url.py +10 -0
  455. synth_ai/_utils/http.py +10 -0
  456. synth_ai/_utils/prompts.py +10 -0
  457. synth_ai/_utils/task_app_state.py +12 -0
  458. synth_ai/_utils/user_config.py +10 -0
  459. synth_ai/api/models/supported.py +514 -0
  460. synth_ai/api/train/__init__.py +60 -2
  461. synth_ai/api/train/builders.py +347 -39
  462. synth_ai/api/train/cli.py +895 -160
  463. synth_ai/api/train/config_finder.py +103 -25
  464. synth_ai/api/train/configs/__init__.py +65 -0
  465. synth_ai/api/train/configs/prompt_learning.py +496 -0
  466. synth_ai/api/train/configs/rl.py +188 -0
  467. synth_ai/api/train/configs/sft.py +99 -0
  468. synth_ai/api/train/configs/shared.py +81 -0
  469. synth_ai/api/train/env_resolver.py +70 -20
  470. synth_ai/api/train/pollers.py +29 -4
  471. synth_ai/api/train/prompt_learning.py +425 -0
  472. synth_ai/api/train/sft.py +390 -0
  473. synth_ai/api/train/supported_algos.py +147 -0
  474. synth_ai/api/train/task_app.py +6 -4
  475. synth_ai/api/train/utils.py +64 -52
  476. synth_ai/api/train/validators.py +1117 -0
  477. synth_ai/api/tunnel.py +49 -0
  478. synth_ai/auth/credentials.py +94 -0
  479. synth_ai/baseline/__init__.py +25 -0
  480. synth_ai/baseline/config.py +209 -0
  481. synth_ai/baseline/discovery.py +214 -0
  482. synth_ai/baseline/execution.py +146 -0
  483. synth_ai/cfgs.py +227 -0
  484. synth_ai/cli/__init__.py +85 -63
  485. synth_ai/cli/_modal_wrapper.py +31 -0
  486. synth_ai/cli/_storage.py +20 -0
  487. synth_ai/cli/_typer_patch.py +47 -0
  488. synth_ai/cli/_validate_task_app.py +29 -0
  489. synth_ai/cli/balance.py +16 -4
  490. synth_ai/cli/calc.py +36 -21
  491. synth_ai/cli/claude.py +70 -0
  492. synth_ai/cli/codex.py +267 -0
  493. synth_ai/cli/commands/__init__.py +18 -0
  494. synth_ai/cli/commands/baseline/__init__.py +12 -0
  495. synth_ai/cli/commands/baseline/core.py +637 -0
  496. synth_ai/cli/commands/baseline/list.py +93 -0
  497. synth_ai/cli/commands/demo/__init__.py +6 -0
  498. synth_ai/cli/commands/demo/core.py +163 -0
  499. synth_ai/cli/commands/eval/__init__.py +19 -0
  500. synth_ai/cli/commands/eval/core.py +1112 -0
  501. synth_ai/cli/commands/eval/errors.py +81 -0
  502. synth_ai/cli/commands/eval/validation.py +133 -0
  503. synth_ai/cli/commands/filter/__init__.py +12 -0
  504. synth_ai/cli/commands/filter/core.py +424 -0
  505. synth_ai/cli/commands/filter/errors.py +55 -0
  506. synth_ai/cli/commands/filter/validation.py +77 -0
  507. synth_ai/cli/commands/help/__init__.py +185 -0
  508. synth_ai/cli/commands/help/core.py +72 -0
  509. synth_ai/cli/commands/smoke/__init__.py +7 -0
  510. synth_ai/cli/commands/smoke/core.py +1437 -0
  511. synth_ai/cli/commands/status/__init__.py +66 -0
  512. synth_ai/cli/commands/status/client.py +192 -0
  513. synth_ai/cli/commands/status/config.py +92 -0
  514. synth_ai/cli/commands/status/errors.py +20 -0
  515. synth_ai/cli/commands/status/formatters.py +164 -0
  516. synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
  517. synth_ai/cli/commands/status/subcommands/files.py +79 -0
  518. synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
  519. synth_ai/cli/commands/status/subcommands/models.py +79 -0
  520. synth_ai/cli/commands/status/subcommands/pricing.py +22 -0
  521. synth_ai/cli/commands/status/subcommands/runs.py +81 -0
  522. synth_ai/cli/commands/status/subcommands/session.py +183 -0
  523. synth_ai/cli/commands/status/subcommands/summary.py +47 -0
  524. synth_ai/cli/commands/status/subcommands/usage.py +203 -0
  525. synth_ai/cli/commands/status/utils.py +114 -0
  526. synth_ai/cli/commands/train/__init__.py +53 -0
  527. synth_ai/cli/commands/train/core.py +21 -0
  528. synth_ai/cli/commands/train/errors.py +117 -0
  529. synth_ai/cli/commands/train/judge_schemas.py +200 -0
  530. synth_ai/cli/commands/train/judge_validation.py +305 -0
  531. synth_ai/cli/commands/train/validation.py +386 -0
  532. synth_ai/cli/demo.py +32 -140
  533. synth_ai/cli/deploy.py +233 -0
  534. synth_ai/cli/eval/__init__.py +36 -0
  535. synth_ai/cli/eval/core.py +5 -0
  536. synth_ai/cli/eval/errors.py +31 -0
  537. synth_ai/cli/eval/validation.py +5 -0
  538. synth_ai/cli/filter/__init__.py +28 -0
  539. synth_ai/cli/filter/core.py +5 -0
  540. synth_ai/cli/filter/errors.py +23 -0
  541. synth_ai/cli/filter/validation.py +5 -0
  542. synth_ai/cli/legacy_root_backup.py +28 -22
  543. synth_ai/cli/lib/__init__.py +10 -0
  544. synth_ai/cli/lib/task_app_discovery.py +7 -0
  545. synth_ai/cli/lib/task_app_env.py +518 -0
  546. synth_ai/cli/mcp.py +34 -0
  547. synth_ai/cli/modal_serve/__init__.py +12 -0
  548. synth_ai/cli/modal_serve/core.py +14 -0
  549. synth_ai/cli/modal_serve/errors.py +8 -0
  550. synth_ai/cli/modal_serve/validation.py +11 -0
  551. synth_ai/cli/opencode.py +256 -0
  552. synth_ai/cli/recent.py +13 -7
  553. synth_ai/cli/rl_demo.py +156 -116
  554. synth_ai/cli/root.py +131 -132
  555. synth_ai/cli/serve/__init__.py +12 -0
  556. synth_ai/cli/serve/core.py +14 -0
  557. synth_ai/cli/serve/errors.py +8 -0
  558. synth_ai/cli/serve/validation.py +11 -0
  559. synth_ai/cli/setup.py +49 -0
  560. synth_ai/cli/status.py +7 -125
  561. synth_ai/cli/task_app_deploy.py +7 -0
  562. synth_ai/cli/task_app_list.py +25 -0
  563. synth_ai/cli/task_app_modal_serve.py +11 -0
  564. synth_ai/cli/task_app_serve.py +11 -0
  565. synth_ai/cli/task_apps.py +2284 -257
  566. synth_ai/cli/traces.py +9 -5
  567. synth_ai/cli/train/__init__.py +12 -0
  568. synth_ai/cli/train/core.py +21 -0
  569. synth_ai/cli/train/errors.py +8 -0
  570. synth_ai/cli/train/validation.py +24 -0
  571. synth_ai/cli/train.py +5 -0
  572. synth_ai/cli/turso.py +73 -0
  573. synth_ai/cli/watch.py +13 -18
  574. synth_ai/demos/__init__.py +10 -0
  575. synth_ai/demos/core/__init__.py +28 -1
  576. synth_ai/demos/core/cli.py +579 -291
  577. synth_ai/demos/crafter/__init__.py +1 -0
  578. synth_ai/demos/crafter/crafter_fft_4b.toml +55 -0
  579. synth_ai/demos/crafter/grpo_crafter_task_app.py +185 -0
  580. synth_ai/demos/crafter/rl_from_base_qwen4b.toml +74 -0
  581. synth_ai/demos/demo_registry.py +176 -0
  582. synth_ai/demos/demo_task_apps/__init__.py +3 -3
  583. synth_ai/demos/demo_task_apps/core.py +64 -28
  584. synth_ai/demos/demo_task_apps/crafter/__init__.py +1 -0
  585. synth_ai/demos/demo_task_apps/crafter/configs/crafter_fft_4b.toml +53 -0
  586. synth_ai/demos/demo_task_apps/crafter/configs/rl_from_base_qwen4b.toml +73 -0
  587. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +184 -0
  588. synth_ai/demos/demo_task_apps/math/_common.py +1 -2
  589. synth_ai/demos/demo_task_apps/math/app.py +2 -1
  590. synth_ai/demos/demo_task_apps/math/deploy_modal.py +3 -6
  591. synth_ai/demos/demo_task_apps/math/modal_task_app.py +185 -83
  592. synth_ai/demos/demo_task_apps/math/task_app_entry.py +0 -2
  593. synth_ai/demos/math/__init__.py +1 -0
  594. synth_ai/demos/math/_common.py +16 -0
  595. synth_ai/demos/math/app.py +38 -0
  596. synth_ai/demos/math/config.toml +76 -0
  597. synth_ai/demos/math/deploy_modal.py +54 -0
  598. synth_ai/demos/math/modal_task_app.py +703 -0
  599. synth_ai/demos/math/task_app_entry.py +51 -0
  600. synth_ai/environments/environment/core.py +7 -1
  601. synth_ai/environments/examples/bandit/engine.py +12 -5
  602. synth_ai/environments/examples/bandit/environment.py +0 -1
  603. synth_ai/environments/examples/bandit/taskset.py +4 -4
  604. synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +7 -4
  605. synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +9 -5
  606. synth_ai/environments/examples/crafter_classic/environment.py +93 -2
  607. synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +4 -3
  608. synth_ai/environments/examples/enron/engine.py +7 -2
  609. synth_ai/environments/examples/enron/environment.py +68 -0
  610. synth_ai/environments/examples/red/engine.py +60 -12
  611. synth_ai/environments/examples/red/engine_helpers/memory_map.py +7 -0
  612. synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
  613. synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_progression.py +477 -0
  614. synth_ai/environments/examples/red/engine_helpers/state_extraction.py +32 -0
  615. synth_ai/environments/examples/red/environment.py +86 -0
  616. synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
  617. synth_ai/environments/examples/sokoban/taskset.py +116 -0
  618. synth_ai/environments/examples/verilog/engine.py +104 -12
  619. synth_ai/environments/examples/wordle/environment.py +0 -1
  620. synth_ai/environments/reproducibility/tree.py +5 -6
  621. synth_ai/environments/service/app.py +11 -12
  622. synth_ai/environments/service/core_routes.py +10 -9
  623. synth_ai/environments/stateful/engine.py +1 -1
  624. synth_ai/environments/tasks/core.py +1 -0
  625. synth_ai/environments/tasks/filters.py +5 -6
  626. synth_ai/environments/tasks/utils.py +4 -5
  627. synth_ai/evals/__init__.py +15 -0
  628. synth_ai/evals/base.py +14 -5
  629. synth_ai/evals/client.py +82 -0
  630. synth_ai/evals/types.py +42 -0
  631. synth_ai/http.py +8 -22
  632. synth_ai/http_client.py +45 -12
  633. synth_ai/inference/__init__.py +0 -2
  634. synth_ai/inference/client.py +21 -7
  635. synth_ai/jobs/client.py +129 -80
  636. synth_ai/judge_schemas.py +127 -0
  637. synth_ai/learning/__init__.py +51 -6
  638. synth_ai/learning/algorithms.py +14 -0
  639. synth_ai/learning/client.py +122 -30
  640. synth_ai/learning/config.py +2 -40
  641. synth_ai/learning/constants.py +0 -2
  642. synth_ai/learning/ft_client.py +4 -56
  643. synth_ai/learning/health.py +14 -8
  644. synth_ai/learning/jobs.py +43 -47
  645. synth_ai/learning/prompt_learning_client.py +276 -0
  646. synth_ai/learning/prompt_learning_types.py +185 -0
  647. synth_ai/{rl → learning/rl}/__init__.py +14 -5
  648. synth_ai/learning/rl/client.py +269 -0
  649. synth_ai/learning/rl/config.py +31 -0
  650. synth_ai/{rl → learning/rl}/contracts.py +5 -10
  651. synth_ai/{rl → learning/rl}/env_keys.py +45 -16
  652. synth_ai/learning/rl/secrets.py +13 -0
  653. synth_ai/learning/rl_client.py +2 -253
  654. synth_ai/learning/sft/__init__.py +29 -0
  655. synth_ai/learning/sft/client.py +68 -0
  656. synth_ai/learning/sft/config.py +270 -0
  657. synth_ai/learning/sft/data.py +698 -0
  658. synth_ai/learning/sse.py +25 -26
  659. synth_ai/learning/validators.py +29 -25
  660. synth_ai/mcp/__init__.py +5 -0
  661. synth_ai/mcp/__main__.py +8 -0
  662. synth_ai/mcp/main.py +254 -0
  663. synth_ai/mcp/setup.py +100 -0
  664. synth_ai/modal.py +257 -0
  665. synth_ai/pricing/__init__.py +3 -0
  666. synth_ai/pricing/model_pricing.py +64 -0
  667. synth_ai/session/__init__.py +75 -0
  668. synth_ai/session/client.py +383 -0
  669. synth_ai/session/constants.py +63 -0
  670. synth_ai/session/exceptions.py +105 -0
  671. synth_ai/session/manager.py +139 -0
  672. synth_ai/session/models.py +89 -0
  673. synth_ai/session/query.py +110 -0
  674. synth_ai/spec/__init__.py +46 -0
  675. synth_ai/spec/dataclasses.py +149 -0
  676. synth_ai/spec/loader.py +144 -0
  677. synth_ai/spec/serializer.py +199 -0
  678. synth_ai/spec/validation.py +250 -0
  679. synth_ai/streaming/__init__.py +29 -0
  680. synth_ai/streaming/config.py +94 -0
  681. synth_ai/streaming/handlers.py +589 -0
  682. synth_ai/streaming/streamer.py +320 -0
  683. synth_ai/streaming/types.py +95 -0
  684. synth_ai/task/__init__.py +50 -30
  685. synth_ai/task/apps/__init__.py +63 -19
  686. synth_ai/task/auth.py +35 -23
  687. synth_ai/task/client.py +15 -13
  688. synth_ai/task/config.py +261 -0
  689. synth_ai/task/contracts.py +165 -64
  690. synth_ai/task/datasets.py +9 -6
  691. synth_ai/task/errors.py +11 -10
  692. synth_ai/task/health.py +17 -11
  693. synth_ai/task/inference_api.py +101 -0
  694. synth_ai/task/json.py +58 -24
  695. synth_ai/task/proxy.py +59 -66
  696. synth_ai/task/rubrics/__init__.py +55 -0
  697. synth_ai/task/rubrics/loaders.py +156 -0
  698. synth_ai/task/rubrics/models.py +57 -0
  699. synth_ai/task/rubrics/scoring.py +116 -0
  700. synth_ai/task/rubrics/strict.py +149 -0
  701. synth_ai/task/rubrics.py +22 -15
  702. synth_ai/task/server.py +65 -31
  703. synth_ai/task/trace_correlation_helpers.py +328 -0
  704. synth_ai/task/tracing_utils.py +44 -28
  705. synth_ai/task/validators.py +449 -6
  706. synth_ai/task/vendors.py +5 -7
  707. synth_ai/tracing_v3/__init__.py +4 -0
  708. synth_ai/tracing_v3/abstractions.py +21 -4
  709. synth_ai/tracing_v3/config.py +167 -22
  710. synth_ai/tracing_v3/constants.py +21 -0
  711. synth_ai/tracing_v3/db_config.py +42 -29
  712. synth_ai/tracing_v3/decorators.py +80 -45
  713. synth_ai/tracing_v3/examples/basic_usage.py +15 -9
  714. synth_ai/tracing_v3/hooks.py +6 -4
  715. synth_ai/tracing_v3/llm_call_record_helpers.py +161 -61
  716. synth_ai/tracing_v3/migration_helper.py +1 -2
  717. synth_ai/tracing_v3/replica_sync.py +12 -7
  718. synth_ai/tracing_v3/serialization.py +130 -0
  719. synth_ai/tracing_v3/session_tracer.py +73 -16
  720. synth_ai/tracing_v3/storage/base.py +89 -1
  721. synth_ai/tracing_v3/storage/config.py +63 -16
  722. synth_ai/tracing_v3/storage/factory.py +11 -9
  723. synth_ai/tracing_v3/storage/utils.py +15 -11
  724. synth_ai/tracing_v3/trace_utils.py +317 -0
  725. synth_ai/tracing_v3/turso/__init__.py +8 -21
  726. synth_ai/tracing_v3/turso/daemon.py +123 -15
  727. synth_ai/tracing_v3/turso/models.py +5 -2
  728. synth_ai/tracing_v3/turso/native_manager.py +1293 -0
  729. synth_ai/tracing_v3/utils.py +5 -4
  730. synth_ai/tunnel.py +143 -0
  731. synth_ai/tunnel_deploy.py +278 -0
  732. synth_ai/types.py +8 -0
  733. synth_ai/urls.py +11 -0
  734. synth_ai/utils/__init__.py +166 -0
  735. synth_ai/utils/agents.py +74 -0
  736. synth_ai/utils/apps.py +152 -0
  737. synth_ai/utils/base_url.py +94 -0
  738. synth_ai/utils/bin.py +39 -0
  739. synth_ai/utils/claude.py +36 -0
  740. synth_ai/utils/cli.py +284 -0
  741. synth_ai/utils/config.py +81 -0
  742. synth_ai/utils/env.py +346 -0
  743. synth_ai/utils/errors.py +85 -0
  744. synth_ai/utils/http.py +172 -0
  745. synth_ai/utils/json.py +72 -0
  746. synth_ai/utils/log_filter.py +99 -0
  747. synth_ai/utils/logging.py +198 -0
  748. synth_ai/utils/modal.py +299 -0
  749. synth_ai/utils/paths.py +95 -0
  750. synth_ai/utils/process.py +233 -0
  751. synth_ai/utils/prompts.py +39 -0
  752. synth_ai/utils/sqld.py +122 -0
  753. synth_ai/utils/ssl.py +25 -0
  754. synth_ai/utils/task_app_discovery.py +882 -0
  755. synth_ai/utils/task_app_env.py +186 -0
  756. synth_ai/utils/task_app_state.py +318 -0
  757. synth_ai/utils/tunnel/__init__.py +12 -0
  758. synth_ai/utils/tunnel/config.py +55 -0
  759. synth_ai/utils/user_config.py +137 -0
  760. synth_ai/uvicorn.py +77 -0
  761. synth_ai-0.2.23.dev3.dist-info/METADATA +357 -0
  762. synth_ai-0.2.23.dev3.dist-info/RECORD +983 -0
  763. {synth_ai-0.2.9.dev0.dist-info → synth_ai-0.2.23.dev3.dist-info}/entry_points.txt +0 -1
  764. {synth_ai-0.2.9.dev0.dist-info → synth_ai-0.2.23.dev3.dist-info}/top_level.txt +1 -0
  765. synth_ai/cli/man.py +0 -106
  766. synth_ai/core/experiment.py +0 -15
  767. synth_ai/core/system.py +0 -15
  768. synth_ai/demo_registry.py +0 -258
  769. synth_ai/environments/examples/sokoban/units/astar_common.py +0 -95
  770. synth_ai/experimental/synth_oss.py +0 -446
  771. synth_ai/handshake.py +0 -107
  772. synth_ai/install_sqld.sh +0 -40
  773. synth_ai/learning/offline/dpo.py +0 -0
  774. synth_ai/learning/offline/providers.py +0 -7
  775. synth_ai/learning/offline/sft.py +0 -0
  776. synth_ai/learning/offline/shared.py +0 -0
  777. synth_ai/learning/online/grpo.py +0 -0
  778. synth_ai/learning/online/irft.py +0 -0
  779. synth_ai/learning/prompts/banking77_injection_eval.py +0 -168
  780. synth_ai/learning/prompts/gepa.py +0 -0
  781. synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +0 -213
  782. synth_ai/learning/prompts/mipro.py +0 -289
  783. synth_ai/learning/prompts/random_search.py +0 -246
  784. synth_ai/learning/prompts/run_mipro_banking77.py +0 -172
  785. synth_ai/learning/prompts/run_random_search_banking77.py +0 -324
  786. synth_ai/lm/__init__.py +0 -51
  787. synth_ai/lm/caching/constants.py +0 -6
  788. synth_ai/lm/caching/dbs.py +0 -0
  789. synth_ai/lm/caching/ephemeral.py +0 -102
  790. synth_ai/lm/caching/handler.py +0 -137
  791. synth_ai/lm/caching/initialize.py +0 -11
  792. synth_ai/lm/caching/persistent.py +0 -114
  793. synth_ai/lm/config.py +0 -110
  794. synth_ai/lm/constants.py +0 -32
  795. synth_ai/lm/core/__init__.py +0 -8
  796. synth_ai/lm/core/all.py +0 -73
  797. synth_ai/lm/core/exceptions.py +0 -7
  798. synth_ai/lm/core/main.py +0 -319
  799. synth_ai/lm/core/main_v3.py +0 -594
  800. synth_ai/lm/core/synth_models.py +0 -48
  801. synth_ai/lm/core/vendor_clients.py +0 -188
  802. synth_ai/lm/cost/monitor.py +0 -1
  803. synth_ai/lm/cost/statefulness.py +0 -1
  804. synth_ai/lm/injection.py +0 -80
  805. synth_ai/lm/overrides.py +0 -206
  806. synth_ai/lm/provider_support/__init__.py +0 -8
  807. synth_ai/lm/provider_support/anthropic.py +0 -972
  808. synth_ai/lm/provider_support/openai.py +0 -1139
  809. synth_ai/lm/provider_support/suppress_logging.py +0 -31
  810. synth_ai/lm/structured_outputs/handler.py +0 -440
  811. synth_ai/lm/structured_outputs/inject.py +0 -297
  812. synth_ai/lm/structured_outputs/rehabilitate.py +0 -185
  813. synth_ai/lm/tools/__init__.py +0 -3
  814. synth_ai/lm/tools/base.py +0 -172
  815. synth_ai/lm/unified_interface.py +0 -202
  816. synth_ai/lm/vendors/base.py +0 -81
  817. synth_ai/lm/vendors/core/anthropic_api.py +0 -387
  818. synth_ai/lm/vendors/core/gemini_api.py +0 -292
  819. synth_ai/lm/vendors/core/mistral_api.py +0 -322
  820. synth_ai/lm/vendors/core/openai_api.py +0 -225
  821. synth_ai/lm/vendors/core/synth_dev_api.py +0 -0
  822. synth_ai/lm/vendors/local/ollama.py +0 -0
  823. synth_ai/lm/vendors/openai_standard.py +0 -780
  824. synth_ai/lm/vendors/openai_standard_responses.py +0 -256
  825. synth_ai/lm/vendors/retries.py +0 -22
  826. synth_ai/lm/vendors/supported/custom_endpoint.py +0 -417
  827. synth_ai/lm/vendors/supported/deepseek.py +0 -69
  828. synth_ai/lm/vendors/supported/grok.py +0 -75
  829. synth_ai/lm/vendors/supported/groq.py +0 -16
  830. synth_ai/lm/vendors/supported/ollama.py +0 -15
  831. synth_ai/lm/vendors/supported/openrouter.py +0 -74
  832. synth_ai/lm/vendors/supported/together.py +0 -11
  833. synth_ai/lm/vendors/synth_client.py +0 -808
  834. synth_ai/lm/warmup.py +0 -186
  835. synth_ai/rl/secrets.py +0 -19
  836. synth_ai/scripts/verify_rewards.py +0 -100
  837. synth_ai/task/apps/grpo_crafter.py +0 -438
  838. synth_ai/tracing/__init__.py +0 -30
  839. synth_ai/tracing_v1/__init__.py +0 -33
  840. synth_ai/tracing_v3/turso/manager.py +0 -774
  841. synth_ai/v0/tracing/abstractions.py +0 -224
  842. synth_ai/v0/tracing/base_client.py +0 -91
  843. synth_ai/v0/tracing/client_manager.py +0 -131
  844. synth_ai/v0/tracing/config.py +0 -142
  845. synth_ai/v0/tracing/context.py +0 -146
  846. synth_ai/v0/tracing/decorators.py +0 -682
  847. synth_ai/v0/tracing/events/__init__.py +0 -0
  848. synth_ai/v0/tracing/events/manage.py +0 -147
  849. synth_ai/v0/tracing/events/scope.py +0 -86
  850. synth_ai/v0/tracing/events/store.py +0 -228
  851. synth_ai/v0/tracing/immediate_client.py +0 -151
  852. synth_ai/v0/tracing/local.py +0 -18
  853. synth_ai/v0/tracing/log_client_base.py +0 -73
  854. synth_ai/v0/tracing/retry_queue.py +0 -186
  855. synth_ai/v0/tracing/trackers.py +0 -515
  856. synth_ai/v0/tracing/upload.py +0 -512
  857. synth_ai/v0/tracing/utils.py +0 -9
  858. synth_ai/v0/tracing_v1/__init__.py +0 -16
  859. synth_ai/v0/tracing_v1/abstractions.py +0 -224
  860. synth_ai/v0/tracing_v1/base_client.py +0 -91
  861. synth_ai/v0/tracing_v1/client_manager.py +0 -131
  862. synth_ai/v0/tracing_v1/config.py +0 -142
  863. synth_ai/v0/tracing_v1/context.py +0 -146
  864. synth_ai/v0/tracing_v1/decorators.py +0 -703
  865. synth_ai/v0/tracing_v1/events/__init__.py +0 -0
  866. synth_ai/v0/tracing_v1/events/manage.py +0 -147
  867. synth_ai/v0/tracing_v1/events/scope.py +0 -86
  868. synth_ai/v0/tracing_v1/events/store.py +0 -228
  869. synth_ai/v0/tracing_v1/immediate_client.py +0 -151
  870. synth_ai/v0/tracing_v1/local.py +0 -18
  871. synth_ai/v0/tracing_v1/log_client_base.py +0 -73
  872. synth_ai/v0/tracing_v1/retry_queue.py +0 -186
  873. synth_ai/v0/tracing_v1/trackers.py +0 -515
  874. synth_ai/v0/tracing_v1/upload.py +0 -527
  875. synth_ai/v0/tracing_v1/utils.py +0 -9
  876. synth_ai/zyk/__init__.py +0 -30
  877. synth_ai-0.2.9.dev0.dist-info/METADATA +0 -131
  878. synth_ai-0.2.9.dev0.dist-info/RECORD +0 -444
  879. {synth_ai/lm/caching → examples/task_apps}/__init__.py +0 -0
  880. {synth_ai/lm/cost → examples/task_apps/crafter}/__init__.py +0 -0
  881. {synth_ai/lm/structured_outputs → examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server}/__init__.py +0 -0
  882. {synth_ai/lm/vendors → examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests}/__init__.py +0 -0
  883. {synth_ai/lm/vendors/core → examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils}/__init__.py +0 -0
  884. {synth_ai/lm/vendors/local → examples/task_apps/math}/__init__.py +0 -0
  885. {synth_ai/lm/vendors/supported → examples/workflows}/__init__.py +0 -0
  886. {synth_ai/v0/tracing → examples/workflows/math_rl}/__init__.py +0 -0
  887. /synth_ai/{compound/cais.py → cli/__main__.py} +0 -0
  888. /synth_ai/{learning/filtering.py → py.typed} +0 -0
  889. {synth_ai-0.2.9.dev0.dist-info → synth_ai-0.2.23.dev3.dist-info}/WHEEL +0 -0
  890. {synth_ai-0.2.9.dev0.dist-info → synth_ai-0.2.23.dev3.dist-info}/licenses/LICENSE +0 -0
@@ -1,14 +1,17 @@
1
- from __future__ import annotations
2
1
  """Main SessionTracer class for tracing v3."""
3
2
 
3
+ from __future__ import annotations
4
+
4
5
  import asyncio
6
+ import json
5
7
  from contextlib import asynccontextmanager
6
- from datetime import datetime
8
+ from datetime import UTC, datetime
7
9
  from typing import Any
8
10
 
9
11
  from .abstractions import (
10
12
  BaseEvent,
11
13
  SessionEventMarkovBlanketMessage,
14
+ SessionMessageContent,
12
15
  SessionTimeStep,
13
16
  SessionTrace,
14
17
  TimeRecord,
@@ -16,7 +19,9 @@ from .abstractions import (
16
19
  from .config import CONFIG
17
20
  from .decorators import set_session_id, set_session_tracer, set_turn_number
18
21
  from .hooks import GLOBAL_HOOKS, HookManager
19
- from .turso.manager import AsyncSQLTraceManager
22
+ from .storage.base import TraceStorage
23
+ from .storage.config import StorageConfig
24
+ from .storage.factory import create_storage
20
25
  from .utils import generate_session_id
21
26
 
22
27
 
@@ -28,6 +33,8 @@ class SessionTracer:
28
33
  hooks: HookManager | None = None,
29
34
  db_url: str | None = None,
30
35
  auto_save: bool = True,
36
+ storage: TraceStorage | None = None,
37
+ storage_config: StorageConfig | None = None,
31
38
  ):
32
39
  """Initialize session tracer.
33
40
 
@@ -40,7 +47,8 @@ class SessionTracer:
40
47
  self._current_trace: SessionTrace | None = None
41
48
  self._lock = asyncio.Lock()
42
49
  self.db_url = db_url or CONFIG.db_url
43
- self.db: AsyncSQLTraceManager | None = None
50
+ self._storage_config = storage_config
51
+ self.db: TraceStorage | None = storage
44
52
  self.auto_save = auto_save
45
53
  self._current_step: SessionTimeStep | None = None
46
54
 
@@ -57,7 +65,8 @@ class SessionTracer:
57
65
  async def initialize(self):
58
66
  """Initialize the database connection."""
59
67
  if self.db is None:
60
- self.db = AsyncSQLTraceManager(self.db_url)
68
+ config = self._storage_config or StorageConfig(connection_string=self.db_url)
69
+ self.db = create_storage(config)
61
70
  await self.db.initialize()
62
71
 
63
72
  async def start_session(
@@ -97,7 +106,7 @@ class SessionTracer:
97
106
 
98
107
  self._current_trace = SessionTrace(
99
108
  session_id=session_id,
100
- created_at=datetime.utcnow(),
109
+ created_at=datetime.now(UTC),
101
110
  session_time_steps=[],
102
111
  event_history=[],
103
112
  markov_blanket_message_history=[],
@@ -110,7 +119,9 @@ class SessionTracer:
110
119
 
111
120
  # Ensure session row exists for incremental writes
112
121
  if self.db:
113
- await self.db.ensure_session(session_id, created_at=self._current_trace.created_at, metadata=metadata or {})
122
+ await self.db.ensure_session(
123
+ session_id, created_at=self._current_trace.created_at, metadata=metadata or {}
124
+ )
114
125
 
115
126
  # Trigger hooks
116
127
  await self.hooks.trigger(
@@ -141,7 +152,7 @@ class SessionTracer:
141
152
  step = SessionTimeStep(
142
153
  step_id=step_id,
143
154
  step_index=len(self._current_trace.session_time_steps),
144
- timestamp=datetime.utcnow(),
155
+ timestamp=datetime.now(UTC),
145
156
  turn_number=turn_number,
146
157
  step_metadata=metadata or {},
147
158
  )
@@ -186,7 +197,7 @@ class SessionTracer:
186
197
  step = self._current_step
187
198
 
188
199
  if step and step.completed_at is None:
189
- step.completed_at = datetime.utcnow()
200
+ step.completed_at = datetime.now(UTC)
190
201
 
191
202
  # Trigger hooks
192
203
  await self.hooks.trigger(
@@ -259,7 +270,7 @@ class SessionTracer:
259
270
 
260
271
  async def record_message(
261
272
  self,
262
- content: str,
273
+ content: Any,
263
274
  message_type: str,
264
275
  event_time: float | None = None,
265
276
  message_time: int | None = None,
@@ -277,11 +288,13 @@ class SessionTracer:
277
288
  if self._current_trace is None:
278
289
  raise RuntimeError("No active session")
279
290
 
291
+ normalised_content, content_str = self._normalise_message_content(content)
292
+
280
293
  msg = SessionEventMarkovBlanketMessage(
281
- content=content,
294
+ content=normalised_content,
282
295
  message_type=message_type,
283
296
  time_record=TimeRecord(
284
- event_time=event_time or datetime.utcnow().timestamp(), message_time=message_time
297
+ event_time=event_time or datetime.now(UTC).timestamp(), message_time=message_time
285
298
  ),
286
299
  metadata=metadata or {},
287
300
  )
@@ -315,7 +328,7 @@ class SessionTracer:
315
328
  self._current_trace.session_id,
316
329
  timestep_db_id=timestep_db_id,
317
330
  message_type=message_type,
318
- content=content,
331
+ content=content_str,
319
332
  event_time=msg.time_record.event_time,
320
333
  message_time=msg.time_record.message_time,
321
334
  metadata=msg.metadata,
@@ -323,6 +336,22 @@ class SessionTracer:
323
336
  return message_id
324
337
  return None
325
338
 
339
+ @staticmethod
340
+ def _normalise_message_content(content: Any) -> tuple[SessionMessageContent, str]:
341
+ if isinstance(content, SessionMessageContent):
342
+ return content, content.as_text()
343
+ if isinstance(content, str):
344
+ payload = SessionMessageContent(text=content)
345
+ return payload, payload.as_text()
346
+ try:
347
+ serialized = json.dumps(content, ensure_ascii=False)
348
+ payload = SessionMessageContent(json_payload=serialized)
349
+ return payload, serialized
350
+ except (TypeError, ValueError):
351
+ text = str(content)
352
+ payload = SessionMessageContent(text=text)
353
+ return payload, text
354
+
326
355
  async def end_session(self, save: bool | None = None) -> SessionTrace:
327
356
  """End the current session.
328
357
 
@@ -339,18 +368,28 @@ class SessionTracer:
339
368
  # End any open timesteps
340
369
  for step in self._current_trace.session_time_steps:
341
370
  if step.completed_at is None:
342
- step.completed_at = datetime.utcnow()
371
+ step.completed_at = datetime.now(UTC)
343
372
 
344
373
  # Trigger pre-save hooks
345
374
  await self.hooks.trigger("before_save", session=self._current_trace)
346
375
 
347
376
  # Save if requested
348
377
  should_save = save if save is not None else self.auto_save
378
+
379
+ # Debug logging
380
+ import logging
381
+ _logger = logging.getLogger(__name__)
382
+ _logger.info(f"[TRACE_DEBUG] end_session: should_save={should_save}, self.db={self.db is not None}, auto_save={self.auto_save}")
383
+
349
384
  if should_save and self.db:
385
+ _logger.info(f"[TRACE_DEBUG] Calling insert_session_trace with {len(self._current_trace.markov_blanket_message_history)} messages")
350
386
  await self.db.insert_session_trace(self._current_trace)
387
+ _logger.info("[TRACE_DEBUG] insert_session_trace completed")
351
388
 
352
389
  # Trigger post-save hooks
353
390
  await self.hooks.trigger("after_save", session=self._current_trace)
391
+ else:
392
+ _logger.warning(f"[TRACE_DEBUG] Skipping save: should_save={should_save}, self.db={self.db is not None}")
354
393
 
355
394
  # Trigger session end hooks
356
395
  await self.hooks.trigger("session_end", session=self._current_trace)
@@ -435,7 +474,14 @@ class SessionTracer:
435
474
  # Reward recording helpers
436
475
  # -------------------------------
437
476
 
438
- async def record_outcome_reward(self, *, total_reward: int, achievements_count: int, total_steps: int, reward_metadata: dict[str, Any] | None = None) -> int | None:
477
+ async def record_outcome_reward(
478
+ self,
479
+ *,
480
+ total_reward: int,
481
+ achievements_count: int,
482
+ total_steps: int,
483
+ reward_metadata: dict[str, Any] | None = None,
484
+ ) -> int | None:
439
485
  """Record an episode-level outcome reward for the current session."""
440
486
  if self._current_trace is None:
441
487
  raise RuntimeError("No active session")
@@ -462,7 +508,18 @@ class SessionTracer:
462
508
 
463
509
  # StepMetrics removed in favor of event_rewards; use record_event_reward for per-turn shaped values
464
510
 
465
- async def record_event_reward(self, *, event_id: int, message_id: int | None = None, turn_number: int | None = None, reward_value: float = 0.0, reward_type: str | None = None, key: str | None = None, annotation: dict[str, Any] | None = None, source: str | None = None) -> int | None:
511
+ async def record_event_reward(
512
+ self,
513
+ *,
514
+ event_id: int,
515
+ message_id: int | None = None,
516
+ turn_number: int | None = None,
517
+ reward_value: float = 0.0,
518
+ reward_type: str | None = None,
519
+ key: str | None = None,
520
+ annotation: dict[str, Any] | None = None,
521
+ source: str | None = None,
522
+ ) -> int | None:
466
523
  """Record a first-class event-level reward with optional annotations."""
467
524
  if self._current_trace is None:
468
525
  raise RuntimeError("No active session")
@@ -54,7 +54,10 @@ class TraceStorage(ABC):
54
54
 
55
55
  @abstractmethod
56
56
  async def get_model_usage(
57
- self, start_date: datetime | None = None, end_date: datetime | None = None, model_name: str | None = None
57
+ self,
58
+ start_date: datetime | None = None,
59
+ end_date: datetime | None = None,
60
+ model_name: str | None = None,
58
61
  ) -> Any:
59
62
  """Get model usage statistics.
60
63
 
@@ -85,6 +88,91 @@ class TraceStorage(ABC):
85
88
  """Close the storage connection."""
86
89
  pass
87
90
 
91
+ # Incremental helpers -------------------------------------------------
92
+
93
+ @abstractmethod
94
+ async def ensure_session(
95
+ self,
96
+ session_id: str,
97
+ *,
98
+ created_at: datetime | None = None,
99
+ metadata: dict[str, Any] | None = None,
100
+ ) -> None:
101
+ """Ensure a session row exists for the given session id."""
102
+ pass
103
+
104
+ @abstractmethod
105
+ async def ensure_timestep(
106
+ self,
107
+ session_id: str,
108
+ *,
109
+ step_id: str,
110
+ step_index: int,
111
+ turn_number: int | None = None,
112
+ started_at: datetime | None = None,
113
+ completed_at: datetime | None = None,
114
+ metadata: dict[str, Any] | None = None,
115
+ ) -> int:
116
+ """Ensure a timestep row exists and return its database id."""
117
+ pass
118
+
119
+ @abstractmethod
120
+ async def insert_event_row(
121
+ self,
122
+ session_id: str,
123
+ *,
124
+ timestep_db_id: int | None,
125
+ event: Any,
126
+ metadata_override: dict[str, Any] | None = None,
127
+ ) -> int:
128
+ """Insert an event and return its database id."""
129
+ pass
130
+
131
+ @abstractmethod
132
+ async def insert_message_row(
133
+ self,
134
+ session_id: str,
135
+ *,
136
+ timestep_db_id: int | None,
137
+ message_type: str,
138
+ content: Any,
139
+ event_time: float | None = None,
140
+ message_time: int | None = None,
141
+ metadata: dict[str, Any] | None = None,
142
+ ) -> int:
143
+ """Insert a message row linked to a session/timestep."""
144
+ pass
145
+
146
+ @abstractmethod
147
+ async def insert_outcome_reward(
148
+ self,
149
+ session_id: str,
150
+ *,
151
+ total_reward: int,
152
+ achievements_count: int,
153
+ total_steps: int,
154
+ reward_metadata: dict | None = None,
155
+ ) -> int:
156
+ """Record an outcome reward for a session."""
157
+ pass
158
+
159
+ @abstractmethod
160
+ async def insert_event_reward(
161
+ self,
162
+ session_id: str,
163
+ *,
164
+ event_id: int,
165
+ message_id: int | None = None,
166
+ turn_number: int | None = None,
167
+ reward_value: float = 0.0,
168
+ reward_type: str | None = None,
169
+ key: str | None = None,
170
+ annotation: dict[str, Any] | None = None,
171
+ source: str | None = None,
172
+ ) -> int:
173
+ """Record a reward tied to a specific event."""
174
+ pass
175
+
88
176
  # Optional experiment management methods
89
177
  async def create_experiment(
90
178
  self,
@@ -1,29 +1,34 @@
1
1
  """Storage configuration for tracing v3."""
2
2
 
3
3
  import os
4
- from dataclasses import dataclass
4
+ from dataclasses import dataclass, field
5
5
  from enum import Enum
6
6
  from typing import Any
7
7
 
8
+ from ..config import resolve_trace_db_auth_token, resolve_trace_db_settings
9
+
8
10
 
9
11
  class StorageBackend(str, Enum):
10
12
  """Supported storage backends."""
11
13
 
12
- TURSO = "turso"
14
+ TURSO_NATIVE = "turso_native"
13
15
  SQLITE = "sqlite"
14
16
  POSTGRES = "postgres" # Future support
15
17
 
16
18
 
19
+ def _is_enabled(value: str | None) -> bool:
20
+ if value is None:
21
+ return False
22
+ return value.lower() in {"1", "true", "yes", "on"}
23
+
24
+
17
25
  @dataclass
18
26
  class StorageConfig:
19
27
  """Configuration for storage backend."""
20
28
 
21
- backend: StorageBackend = StorageBackend.TURSO
22
29
  connection_string: str | None = None
23
-
24
- # Turso-specific settings
25
- turso_url: str = os.getenv("TURSO_DATABASE_URL", "sqlite+libsql://http://127.0.0.1:8080")
26
- turso_auth_token: str = os.getenv("TURSO_AUTH_TOKEN", "")
30
+ backend: StorageBackend | None = None
31
+ turso_auth_token: str | None = field(default=None)
27
32
 
28
33
  # Common settings
29
34
  pool_size: int = int(os.getenv("STORAGE_POOL_SIZE", "8"))
@@ -34,23 +39,65 @@ class StorageConfig:
34
39
  enable_compression: bool = os.getenv("STORAGE_COMPRESSION", "false").lower() == "true"
35
40
  max_content_length: int = int(os.getenv("STORAGE_MAX_CONTENT_LENGTH", "1000000")) # 1MB
36
41
 
42
+ def __post_init__(self):
43
+ # Allow legacy override while keeping compatibility with existing TURSO_NATIVE env flag
44
+ native_env = os.getenv("TURSO_NATIVE")
45
+ native_flag = _is_enabled(native_env) if native_env is not None else None
46
+ resolved_url: str | None = self.connection_string
47
+ resolved_token: str | None = self.turso_auth_token
48
+
49
+ if resolved_url is None:
50
+ resolved_url, inferred_token = resolve_trace_db_settings()
51
+ self.connection_string = resolved_url
52
+ resolved_token = inferred_token
53
+
54
+ if resolved_token is None:
55
+ resolved_token = resolve_trace_db_auth_token()
56
+
57
+ self.turso_auth_token = resolved_token or ""
58
+
59
+ if self.backend is None:
60
+ self.backend = self._infer_backend(self.connection_string or "")
61
+
62
+ if native_flag is False:
63
+ raise RuntimeError("TURSO_NATIVE=false is no longer supported; only Turso/libSQL backend is available.")
64
+
65
+ # Allow both TURSO_NATIVE and SQLITE backends (both use libsql.connect)
66
+ if self.backend not in (StorageBackend.TURSO_NATIVE, StorageBackend.SQLITE):
67
+ raise RuntimeError(f"Unsupported backend: {self.backend}. Only Turso/libSQL and SQLite are supported.")
68
+
69
+ @staticmethod
70
+ def _infer_backend(connection_string: str) -> StorageBackend:
71
+ """Infer backend type from the connection string."""
72
+ scheme = connection_string.split(":", 1)[0].lower()
73
+
74
+ # Plain SQLite files: file://, /absolute/path, or no scheme
75
+ if (
76
+ scheme == "file"
77
+ or scheme.startswith("sqlite")
78
+ or connection_string.startswith("/")
79
+ or "://" not in connection_string
80
+ ):
81
+ return StorageBackend.SQLITE
82
+
83
+ # Turso/sqld: libsql://, http://, https://
84
+ if scheme.startswith("libsql") or "libsql" in scheme or scheme in ("http", "https"):
85
+ return StorageBackend.TURSO_NATIVE
86
+
87
+ raise RuntimeError(f"Unsupported tracing backend scheme: {scheme}")
88
+
37
89
  def get_connection_string(self) -> str:
38
90
  """Get the appropriate connection string for the backend."""
39
91
  if self.connection_string:
40
92
  return self.connection_string
41
93
 
42
- if self.backend == StorageBackend.TURSO:
43
- return self.turso_url
44
- elif self.backend == StorageBackend.SQLITE:
45
- return "sqlite+aiosqlite:///traces.db"
46
- elif self.backend == StorageBackend.POSTGRES:
47
- return os.getenv("POSTGRES_URL", "postgresql+asyncpg://localhost/traces")
48
- else:
49
- raise ValueError(f"Unknown backend: {self.backend}")
94
+ if self.backend == StorageBackend.TURSO_NATIVE:
95
+ return self.connection_string or ""
96
+ raise ValueError(f"Unsupported backend: {self.backend}")
50
97
 
51
98
  def get_backend_config(self) -> dict[str, Any]:
52
99
  """Get backend-specific configuration."""
53
- if self.backend == StorageBackend.TURSO:
100
+ if self.backend == StorageBackend.TURSO_NATIVE:
54
101
  config = {}
55
102
  if self.turso_auth_token:
56
103
  config["auth_token"] = self.turso_auth_token
@@ -1,7 +1,6 @@
1
1
  """Factory for creating storage instances."""
2
2
 
3
-
4
- from ..turso.manager import AsyncSQLTraceManager
3
+ from ..turso.native_manager import NativeLibsqlTraceManager
5
4
  from .base import TraceStorage
6
5
  from .config import StorageBackend, StorageConfig
7
6
 
@@ -23,13 +22,16 @@ def create_storage(config: StorageConfig | None = None) -> TraceStorage:
23
22
 
24
23
  config = STORAGE_CONFIG
25
24
 
26
- if config.backend == StorageBackend.TURSO:
27
- # Turso uses the AsyncSQLTraceManager
28
- return AsyncSQLTraceManager(db_url=config.get_connection_string())
29
- elif config.backend == StorageBackend.SQLITE:
30
- # For pure SQLite, we can still use AsyncSQLTraceManager
31
- # but with a file-based URL
32
- return AsyncSQLTraceManager(db_url=config.get_connection_string())
25
+ connection_string = config.get_connection_string()
26
+
27
+ # Both TURSO_NATIVE and SQLITE use NativeLibsqlTraceManager
28
+ # because libsql.connect() handles both remote and local file databases
29
+ if config.backend in (StorageBackend.TURSO_NATIVE, StorageBackend.SQLITE):
30
+ backend_config = config.get_backend_config()
31
+ return NativeLibsqlTraceManager(
32
+ db_url=connection_string,
33
+ auth_token=backend_config.get("auth_token"),
34
+ )
33
35
  elif config.backend == StorageBackend.POSTGRES:
34
36
  # Future: PostgreSQL implementation
35
37
  raise NotImplementedError("PostgreSQL backend not yet implemented")
@@ -3,8 +3,8 @@
3
3
  import asyncio
4
4
  import functools
5
5
  import time
6
- from collections.abc import Callable
7
- from typing import Any, TypeVar
6
+ from collections.abc import Awaitable, Callable
7
+ from typing import Any, TypeVar, cast
8
8
 
9
9
  T = TypeVar("T")
10
10
 
@@ -18,10 +18,10 @@ def retry_async(max_attempts: int = 3, delay: float = 1.0, backoff: float = 2.0)
18
18
  backoff: Backoff multiplier for each retry
19
19
  """
20
20
 
21
- def decorator(func: Callable[..., T]) -> Callable[..., T]:
21
+ def decorator(func: Callable[..., Awaitable[T]]) -> Callable[..., Awaitable[T]]:
22
22
  @functools.wraps(func)
23
- async def wrapper(*args, **kwargs):
24
- last_exception = None
23
+ async def wrapper(*args: Any, **kwargs: Any) -> T:
24
+ last_exception: Exception | None = None
25
25
  current_delay = delay
26
26
 
27
27
  for attempt in range(max_attempts):
@@ -35,7 +35,9 @@ def retry_async(max_attempts: int = 3, delay: float = 1.0, backoff: float = 2.0)
35
35
  else:
36
36
  raise
37
37
 
38
- raise last_exception
38
+ if last_exception:
39
+ raise last_exception
40
+ raise RuntimeError("Retry logic failed without exception")
39
41
 
40
42
  return wrapper
41
43
 
@@ -169,13 +171,14 @@ STORAGE_METRICS = StorageMetrics()
169
171
  def track_metrics(operation: str):
170
172
  """Decorator to track storage operation metrics."""
171
173
 
172
- def decorator(func: Callable[..., T]) -> Callable[..., T]:
174
+ def decorator(func: Callable[..., Awaitable[T]] | Callable[..., T]) -> Callable[..., Awaitable[T]] | Callable[..., T]:
173
175
  @functools.wraps(func)
174
- async def async_wrapper(*args, **kwargs):
176
+ async def async_wrapper(*args: Any, **kwargs: Any) -> T:
175
177
  start_time = time.time()
176
178
  success = False
177
179
  try:
178
- result = await func(*args, **kwargs)
180
+ async_func = cast(Callable[..., Awaitable[T]], func)
181
+ result = await async_func(*args, **kwargs)
179
182
  success = True
180
183
  return result
181
184
  finally:
@@ -183,11 +186,12 @@ def track_metrics(operation: str):
183
186
  STORAGE_METRICS.record_operation(operation, duration, success)
184
187
 
185
188
  @functools.wraps(func)
186
- def sync_wrapper(*args, **kwargs):
189
+ def sync_wrapper(*args: Any, **kwargs: Any) -> T:
187
190
  start_time = time.time()
188
191
  success = False
189
192
  try:
190
- result = func(*args, **kwargs)
193
+ sync_func = cast(Callable[..., T], func)
194
+ result = sync_func(*args, **kwargs)
191
195
  success = True
192
196
  return result
193
197
  finally: