synth-ai 0.2.9.dev0__py3-none-any.whl → 0.2.23.dev3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (890) hide show
  1. examples/README.md +1 -0
  2. examples/__init__.py +16 -0
  3. examples/analyze_semantic_words.sh +17 -0
  4. examples/baseline/banking77_baseline.py +243 -0
  5. examples/baseline/banking77_pipeline_baseline.py +294 -0
  6. examples/baseline/crafter_baseline.py +407 -0
  7. examples/baseline/pokemon_red_baseline.py +326 -0
  8. examples/baseline/simple_baseline.py +56 -0
  9. examples/baseline/warming_up_to_rl_baseline.py +239 -0
  10. examples/blog_posts/gepa/README.md +355 -0
  11. examples/blog_posts/gepa/configs/banking77_gepa_local.toml +95 -0
  12. examples/blog_posts/gepa/configs/banking77_gepa_test.toml +80 -0
  13. examples/blog_posts/gepa/configs/banking77_mipro_local.toml +50 -0
  14. examples/blog_posts/gepa/configs/banking77_pipeline_gepa_local.toml +101 -0
  15. examples/blog_posts/gepa/configs/banking77_pipeline_gepa_test.toml +96 -0
  16. examples/blog_posts/gepa/configs/hotpotqa_gepa_local.toml +57 -0
  17. examples/blog_posts/gepa/configs/hotpotqa_gepa_qwen.toml +35 -0
  18. examples/blog_posts/gepa/configs/hotpotqa_mipro_local.toml +51 -0
  19. examples/blog_posts/gepa/configs/hover_gepa_local.toml +57 -0
  20. examples/blog_posts/gepa/configs/hover_gepa_qwen.toml +35 -0
  21. examples/blog_posts/gepa/configs/hover_mipro_local.toml +51 -0
  22. examples/blog_posts/gepa/configs/ifbench_gepa_local.toml +57 -0
  23. examples/blog_posts/gepa/configs/ifbench_gepa_qwen.toml +35 -0
  24. examples/blog_posts/gepa/configs/ifbench_mipro_local.toml +51 -0
  25. examples/blog_posts/gepa/configs/pupa_gepa_local.toml +58 -0
  26. examples/blog_posts/gepa/configs/pupa_mipro_local.toml +52 -0
  27. examples/blog_posts/gepa/deploy_banking77_task_app.sh +54 -0
  28. examples/blog_posts/gepa/gepa_baseline.py +204 -0
  29. examples/blog_posts/gepa/query_prompts_example.py +97 -0
  30. examples/blog_posts/gepa/run_gepa_banking77.sh +112 -0
  31. examples/blog_posts/gepa/run_gepa_banking77_pipeline.sh +163 -0
  32. examples/blog_posts/gepa/task_apps.py +105 -0
  33. examples/blog_posts/gepa/test_gepa_local.sh +67 -0
  34. examples/blog_posts/gepa/verify_banking77_setup.sh +123 -0
  35. examples/blog_posts/mipro/README.md +415 -0
  36. examples/blog_posts/mipro/configs/banking77_mipro_local.toml +91 -0
  37. examples/blog_posts/mipro/configs/banking77_mipro_test.toml +87 -0
  38. examples/blog_posts/mipro/configs/banking77_pipeline_mipro_gemini_flash_lite_local.toml +98 -0
  39. examples/blog_posts/mipro/configs/banking77_pipeline_mipro_gpt41mini_local.toml +96 -0
  40. examples/blog_posts/mipro/configs/banking77_pipeline_mipro_local.toml +94 -0
  41. examples/blog_posts/mipro/configs/banking77_pipeline_mipro_test.toml +170 -0
  42. examples/blog_posts/mipro/deploy_banking77_pipeline_task_app.sh +59 -0
  43. examples/blog_posts/mipro/deploy_banking77_task_app.sh +41 -0
  44. examples/blog_posts/mipro/multi_step.md +79 -0
  45. examples/blog_posts/mipro/run_mipro_banking77.sh +191 -0
  46. examples/blog_posts/mipro/run_mipro_banking77_pipeline.sh +171 -0
  47. examples/blog_posts/mipro/run_mipro_banking77_pipeline_gemini_flash_lite.sh +177 -0
  48. examples/blog_posts/mipro/run_mipro_banking77_pipeline_gpt41mini.sh +173 -0
  49. examples/blog_posts/mipro/verify_banking77_setup.sh +117 -0
  50. examples/blog_posts/pokemon_vl/README.md +98 -0
  51. examples/blog_posts/pokemon_vl/configs/eval_gpt5nano.toml +26 -0
  52. examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +27 -0
  53. examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
  54. examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
  55. examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +43 -0
  56. examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
  57. examples/blog_posts/pokemon_vl/extract_images.py +239 -0
  58. examples/blog_posts/pokemon_vl/pokemon_vl_baseline.py +326 -0
  59. examples/blog_posts/pokemon_vl/run_eval_extract_images.py +209 -0
  60. examples/blog_posts/pokemon_vl/run_qwen_eval_extract_images.py +212 -0
  61. examples/blog_posts/pokemon_vl/text_box_analysis.md +106 -0
  62. examples/blog_posts/warming_up_to_rl/ARCHITECTURE.md +195 -0
  63. examples/blog_posts/warming_up_to_rl/FINAL_TEST_RESULTS.md +127 -0
  64. examples/blog_posts/warming_up_to_rl/INFERENCE_SUCCESS.md +132 -0
  65. examples/blog_posts/warming_up_to_rl/README.md +158 -0
  66. examples/blog_posts/warming_up_to_rl/SMOKE_TESTING.md +164 -0
  67. examples/blog_posts/warming_up_to_rl/SMOKE_TEST_COMPLETE.md +253 -0
  68. examples/blog_posts/warming_up_to_rl/configs/eval_baseline_qwen32b_10x20.toml +25 -0
  69. examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
  70. examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b_10x20.toml +26 -0
  71. examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
  72. examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
  73. examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
  74. examples/blog_posts/warming_up_to_rl/configs/smoke_test.toml +75 -0
  75. examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +91 -0
  76. examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
  77. examples/blog_posts/warming_up_to_rl/warming_up_to_rl_baseline.py +187 -0
  78. examples/crafter_debug_render.py +186 -0
  79. examples/dev/qwen3_32b_qlora_4xh100.toml +45 -0
  80. examples/gepa/banking77_pipeline_gepa.toml +96 -0
  81. examples/gepa/multi_stage_gepa_example.toml +84 -0
  82. examples/gepa/run_gepa_banking77_pipeline.sh +157 -0
  83. examples/multi_step/SFT_README.md +147 -0
  84. examples/multi_step/configs/README_verilog_rl.md +77 -0
  85. examples/multi_step/configs/VERILOG_REWARDS.md +103 -0
  86. examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +196 -0
  87. examples/multi_step/configs/crafter_eval_synth_qwen4b.toml +35 -0
  88. examples/multi_step/configs/crafter_eval_text_only_groq_qwen32b.toml +36 -0
  89. examples/multi_step/configs/crafter_rl_outcome.toml +75 -0
  90. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +145 -0
  91. examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +84 -0
  92. examples/multi_step/configs/crafter_rl_stepwise_simple.toml +79 -0
  93. examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
  94. examples/multi_step/configs/crafter_sft_qwen30b_lora.toml +62 -0
  95. examples/multi_step/configs/crafter_synth_backend.md +40 -0
  96. examples/multi_step/configs/verilog_eval_groq_qwen32b.toml +31 -0
  97. examples/multi_step/configs/verilog_eval_synth_qwen8b.toml +33 -0
  98. examples/multi_step/configs/verilog_rl_lora.toml +147 -0
  99. examples/multi_step/convert_traces_to_sft.py +84 -0
  100. examples/multi_step/crafter_rl_lora.md +70 -0
  101. examples/multi_step/judges/crafter_backend_judge.py +220 -0
  102. examples/multi_step/judges/verilog_backend_judge.py +234 -0
  103. examples/multi_step/readme.md +48 -0
  104. examples/multi_step/run_sft_qwen30b.sh +45 -0
  105. examples/multi_step/sse_metrics_streaming_notes.md +357 -0
  106. examples/multi_step/task_app_config_notes.md +494 -0
  107. examples/multi_step/verilog_rl_lora.md +218 -0
  108. examples/qwen_coder/README.md +102 -0
  109. examples/qwen_coder/_shared.py +113 -0
  110. examples/qwen_coder/configs/coder_lora_30b.toml +60 -0
  111. examples/qwen_coder/configs/coder_lora_4b.toml +61 -0
  112. examples/qwen_coder/configs/coder_lora_small.toml +57 -0
  113. examples/qwen_coder/generate_dataset.py +98 -0
  114. examples/qwen_coder/infer_ft_smoke.py +65 -0
  115. examples/qwen_coder/infer_prod_proxy.py +73 -0
  116. examples/qwen_coder/infer_via_synth.py +87 -0
  117. examples/qwen_coder/scripts/infer_coder.sh +19 -0
  118. examples/qwen_coder/scripts/train_coder_30b.sh +22 -0
  119. examples/qwen_coder/sft_full_17b.py +103 -0
  120. examples/qwen_coder/sft_lora_30b.py +110 -0
  121. examples/qwen_coder/subset_jsonl.py +39 -0
  122. examples/qwen_coder/todos.md +38 -0
  123. examples/qwen_coder/validate_jsonl.py +60 -0
  124. examples/qwen_vl/BUGS_AND_FIXES.md +232 -0
  125. examples/qwen_vl/IMAGE_VALIDATION_COMPLETE.md +271 -0
  126. examples/qwen_vl/IMAGE_VALIDATION_SUMMARY.md +260 -0
  127. examples/qwen_vl/INFERENCE_SFT_TESTS.md +412 -0
  128. examples/qwen_vl/NEXT_STEPS_2B.md +325 -0
  129. examples/qwen_vl/QUICKSTART.md +327 -0
  130. examples/qwen_vl/QUICKSTART_RL_VISION.md +110 -0
  131. examples/qwen_vl/README.md +152 -0
  132. examples/qwen_vl/RL_VISION_COMPLETE.md +475 -0
  133. examples/qwen_vl/RL_VISION_TESTING.md +333 -0
  134. examples/qwen_vl/SDK_VISION_INTEGRATION.md +328 -0
  135. examples/qwen_vl/SETUP_COMPLETE.md +274 -0
  136. examples/qwen_vl/VISION_TESTS_COMPLETE.md +489 -0
  137. examples/qwen_vl/VLM_PIPELINE_COMPLETE.md +242 -0
  138. examples/qwen_vl/__init__.py +2 -0
  139. examples/qwen_vl/collect_data_via_cli.md +415 -0
  140. examples/qwen_vl/collect_vision_traces.py +368 -0
  141. examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +110 -0
  142. examples/qwen_vl/configs/crafter_vlm_sft_example.toml +59 -0
  143. examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +26 -0
  144. examples/qwen_vl/configs/eval_gpt4o_vision_proper.toml +29 -0
  145. examples/qwen_vl/configs/eval_gpt5nano_vision.toml +26 -0
  146. examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
  147. examples/qwen_vl/configs/filter_qwen3vl_sft.toml +49 -0
  148. examples/qwen_vl/configs/filter_vision_sft.toml +52 -0
  149. examples/qwen_vl/configs/filter_vision_test.toml +8 -0
  150. examples/qwen_vl/configs/sft_qwen3_vl_2b_test.toml +54 -0
  151. examples/qwen_vl/crafter_gpt5nano_agent.py +308 -0
  152. examples/qwen_vl/crafter_qwen_vl_agent.py +300 -0
  153. examples/qwen_vl/run_vision_comparison.sh +61 -0
  154. examples/qwen_vl/run_vision_sft_pipeline.sh +175 -0
  155. examples/qwen_vl/test_image_validation.py +201 -0
  156. examples/qwen_vl/test_sft_vision_data.py +110 -0
  157. examples/rl/README.md +169 -0
  158. examples/rl/configs/eval_base_qwen.toml +17 -0
  159. examples/rl/configs/eval_rl_qwen.toml +13 -0
  160. examples/rl/configs/rl_from_base_qwen.toml +62 -0
  161. examples/rl/configs/rl_from_base_qwen17.toml +80 -0
  162. examples/rl/configs/rl_from_ft_qwen.toml +37 -0
  163. examples/rl/download_dataset.py +80 -0
  164. examples/rl/run_eval.py +436 -0
  165. examples/rl/run_rl_and_save.py +111 -0
  166. examples/rl/task_app/README.md +21 -0
  167. {synth_ai/task/apps → examples/rl/task_app}/math_single_step.py +188 -50
  168. examples/rl/task_app/math_task_app.py +111 -0
  169. examples/run_crafter_demo.sh +10 -0
  170. examples/sdk_prompt_learning_example.py +55 -0
  171. examples/sft/README.md +139 -0
  172. examples/sft/configs/crafter_fft_qwen0p6b.toml +49 -0
  173. examples/sft/configs/crafter_lora_qwen0p6b.toml +49 -0
  174. examples/sft/evaluate.py +117 -0
  175. examples/sft/export_dataset.py +120 -0
  176. examples/sft/generate_traces.py +164 -0
  177. examples/swe/__init__.py +12 -0
  178. examples/swe/task_app/README.md +135 -0
  179. examples/swe/task_app/__init__.py +2 -0
  180. examples/swe/task_app/grpo_swe_mini.py +604 -0
  181. examples/swe/task_app/grpo_swe_mini_task_app.py +124 -0
  182. examples/swe/task_app/hosted/README.md +173 -0
  183. examples/swe/task_app/hosted/__init__.py +5 -0
  184. examples/swe/task_app/hosted/branching.py +143 -0
  185. examples/swe/task_app/hosted/environment_routes.py +1289 -0
  186. examples/swe/task_app/hosted/envs/__init__.py +1 -0
  187. examples/swe/task_app/hosted/envs/crafter/__init__.py +6 -0
  188. examples/swe/task_app/hosted/envs/crafter/app.py +1 -0
  189. examples/swe/task_app/hosted/envs/crafter/environment.py +522 -0
  190. examples/swe/task_app/hosted/envs/crafter/policy.py +478 -0
  191. examples/swe/task_app/hosted/envs/crafter/react_agent.py +108 -0
  192. examples/swe/task_app/hosted/envs/crafter/shared.py +305 -0
  193. examples/swe/task_app/hosted/envs/crafter/tools.py +47 -0
  194. examples/swe/task_app/hosted/envs/mini_swe/__init__.py +8 -0
  195. examples/swe/task_app/hosted/envs/mini_swe/environment.py +1191 -0
  196. examples/swe/task_app/hosted/envs/mini_swe/policy.py +355 -0
  197. examples/swe/task_app/hosted/envs/mini_swe/shared.py +83 -0
  198. examples/swe/task_app/hosted/envs/mini_swe/tools.py +96 -0
  199. examples/swe/task_app/hosted/hosted_app.py +204 -0
  200. examples/swe/task_app/hosted/inference/__init__.py +5 -0
  201. examples/swe/task_app/hosted/inference/openai_client.py +584 -0
  202. examples/swe/task_app/hosted/main.py +100 -0
  203. examples/swe/task_app/hosted/policy_routes.py +1094 -0
  204. examples/swe/task_app/hosted/registry.py +195 -0
  205. examples/swe/task_app/hosted/rollout.py +1905 -0
  206. examples/swe/task_app/hosted/storage/__init__.py +5 -0
  207. examples/swe/task_app/hosted/storage/volume.py +211 -0
  208. examples/swe/task_app/hosted/test_agents.py +161 -0
  209. examples/swe/task_app/hosted/test_service.py +136 -0
  210. examples/swe/task_app/hosted/utils.py +62 -0
  211. examples/swe/task_app/morph_backend.py +178 -0
  212. examples/task_apps/IMAGE_ONLY_EVAL_QUICKSTART.md +258 -0
  213. examples/task_apps/TESTING.md +275 -0
  214. examples/task_apps/banking77/__init__.py +6 -0
  215. examples/task_apps/banking77/banking77_task_app.py +912 -0
  216. examples/task_apps/banking77/deploy_wrapper.py +46 -0
  217. examples/task_apps/banking77_pipeline/__init__.py +6 -0
  218. examples/task_apps/banking77_pipeline/banking77_pipeline_task_app.py +489 -0
  219. examples/task_apps/banking77_pipeline/deploy_wrapper.py +50 -0
  220. examples/task_apps/crafter/CREATE_SFT_DATASET.md +286 -0
  221. examples/task_apps/crafter/EVAL_IMAGE_ONLY_RESULTS.md +152 -0
  222. examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +187 -0
  223. examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +281 -0
  224. examples/task_apps/crafter/QUERY_EXAMPLES.md +203 -0
  225. examples/task_apps/crafter/README_IMAGE_ONLY_EVAL.md +316 -0
  226. examples/task_apps/crafter/eval_image_only_gpt4o.toml +28 -0
  227. examples/task_apps/crafter/eval_text_only_groq_llama.toml +36 -0
  228. examples/task_apps/crafter/filter_sft_dataset.toml +16 -0
  229. examples/task_apps/crafter/task_app/README.md +42 -0
  230. examples/task_apps/crafter/task_app/__init__.py +5 -0
  231. examples/task_apps/crafter/task_app/grpo_crafter.py +1055 -0
  232. examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +146 -0
  233. examples/task_apps/crafter/task_app/synth_envs_hosted/README.md +173 -0
  234. examples/task_apps/crafter/task_app/synth_envs_hosted/__init__.py +5 -0
  235. examples/task_apps/crafter/task_app/synth_envs_hosted/branching.py +143 -0
  236. examples/task_apps/crafter/task_app/synth_envs_hosted/environment_routes.py +1226 -0
  237. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/__init__.py +1 -0
  238. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
  239. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
  240. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/environment.py +532 -0
  241. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +583 -0
  242. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +122 -0
  243. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
  244. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
  245. examples/task_apps/crafter/task_app/synth_envs_hosted/hosted_app.py +253 -0
  246. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/__init__.py +5 -0
  247. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +999 -0
  248. examples/task_apps/crafter/task_app/synth_envs_hosted/main.py +100 -0
  249. examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +1252 -0
  250. examples/task_apps/crafter/task_app/synth_envs_hosted/registry.py +195 -0
  251. examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +2233 -0
  252. examples/task_apps/crafter/task_app/synth_envs_hosted/storage/__init__.py +5 -0
  253. examples/task_apps/crafter/task_app/synth_envs_hosted/storage/volume.py +211 -0
  254. examples/task_apps/crafter/task_app/synth_envs_hosted/test_agents.py +161 -0
  255. examples/task_apps/crafter/task_app/synth_envs_hosted/test_service.py +136 -0
  256. examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +411 -0
  257. examples/task_apps/dev/pokemon_emerald/__init__.py +2 -0
  258. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/README.md +811 -0
  259. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/__init__.py +120 -0
  260. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/action.py +160 -0
  261. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/memory.py +155 -0
  262. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/perception.py +69 -0
  263. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/planning.py +96 -0
  264. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/simple.py +1502 -0
  265. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/agent/system_prompt.py +4 -0
  266. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/grab_map.py +68 -0
  267. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/manual.py +216 -0
  268. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/__init__.py +35 -0
  269. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emerald_utils.py +631 -0
  270. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/emulator.py +1544 -0
  271. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/enums.py +1428 -0
  272. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/memory_reader.py +4848 -0
  273. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/types.py +41 -0
  274. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pokemon_env/utils.py +298 -0
  275. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/pyproject.toml +95 -0
  276. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/run.py +204 -0
  277. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/app.py +2152 -0
  278. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/client.py +429 -0
  279. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server/frame_server.py +155 -0
  280. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/README.md +78 -0
  281. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/run_tests.py +122 -0
  282. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_direct.py +76 -0
  283. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_agent_prompts.py +413 -0
  284. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_battle_state_formatting.py +204 -0
  285. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection.py +133 -0
  286. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_dialogue_detection_comprehensive.py +229 -0
  287. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_direct_agent_emulator.py +300 -0
  288. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_fps_adjustment_pytest.py +205 -0
  289. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_direct.py +200 -0
  290. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_house_to_outside_transition.py +284 -0
  291. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_map_ground_truth_comparison.py +468 -0
  292. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_memory_map.py +575 -0
  293. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_server_map_validation.py +311 -0
  294. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests/test_torchic_state.py +259 -0
  295. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/anticheat.py +372 -0
  296. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/checkpoint.py +296 -0
  297. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/error_handler.py +275 -0
  298. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/get_local_ip.py +22 -0
  299. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/helpers.py +44 -0
  300. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/llm_logger.py +514 -0
  301. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_formatter.py +415 -0
  302. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher.py +1763 -0
  303. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_stitcher_singleton.py +33 -0
  304. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_trimmer.py +106 -0
  305. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/map_visualizer.py +334 -0
  306. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/ocr_dialogue.py +1020 -0
  307. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/recording.py +188 -0
  308. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/state_formatter.py +1481 -0
  309. examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils/vlm.py +862 -0
  310. examples/task_apps/dev/pokemon_emerald/modal_app.py +114 -0
  311. examples/task_apps/dev/pokemon_emerald/task_app/README.md +81 -0
  312. examples/task_apps/dev/pokemon_emerald/task_app/__init__.py +6 -0
  313. examples/task_apps/dev/pokemon_emerald/task_app/pokemon_emerald.py +685 -0
  314. examples/task_apps/enron/__init__.py +2 -0
  315. examples/task_apps/enron/eval_groq_qwen32.toml +16 -0
  316. examples/task_apps/enron/filter_sft.toml +5 -0
  317. examples/task_apps/enron/task_app/README.md +14 -0
  318. examples/task_apps/enron/task_app/__init__.py +1 -0
  319. examples/task_apps/enron/task_app/grpo_enron.py +906 -0
  320. examples/task_apps/enron/task_app/grpo_enron_task_app.py +146 -0
  321. examples/task_apps/enron/tests/__init__.py +4 -0
  322. examples/task_apps/enron/tests/conftest.py +115 -0
  323. examples/task_apps/enron/tests/integration/__init__.py +4 -0
  324. examples/task_apps/enron/tests/integration/test_enron_eval.py +179 -0
  325. examples/task_apps/enron/tests/integration/test_enron_rollout.py +135 -0
  326. examples/task_apps/enron/tests/unit/__init__.py +4 -0
  327. examples/task_apps/enron/tests/unit/test_enron_environment.py +126 -0
  328. examples/task_apps/gepa_benchmarks/__init__.py +7 -0
  329. examples/task_apps/gepa_benchmarks/common.py +260 -0
  330. examples/task_apps/gepa_benchmarks/hotpotqa_task_app.py +507 -0
  331. examples/task_apps/gepa_benchmarks/hover_task_app.py +436 -0
  332. examples/task_apps/gepa_benchmarks/ifbench_task_app.py +563 -0
  333. examples/task_apps/gepa_benchmarks/pupa_task_app.py +460 -0
  334. examples/task_apps/math/README.md +21 -0
  335. examples/task_apps/math/math_single_step.py +1000 -0
  336. examples/task_apps/math/math_task_app.py +115 -0
  337. examples/task_apps/pokemon_battle/__init__.py +2 -0
  338. examples/task_apps/pokemon_battle/modal_app.py +104 -0
  339. examples/task_apps/pokemon_battle/task_app/README.md +68 -0
  340. examples/task_apps/pokemon_battle/task_app/__init__.py +6 -0
  341. examples/task_apps/pokemon_battle/task_app/pokemon_showdown.py +932 -0
  342. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_COMPLETE.md +283 -0
  343. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_STATUS.md +155 -0
  344. examples/task_apps/pokemon_red/README.md +356 -0
  345. examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +428 -0
  346. examples/task_apps/pokemon_red/__init__.py +3 -0
  347. examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +30 -0
  348. examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +224 -0
  349. examples/task_apps/pokemon_red/pallet_town_rl_config.toml +75 -0
  350. examples/task_apps/pokemon_red/task_app.py +1048 -0
  351. examples/task_apps/pokemon_red/test_pallet_town_rewards.py +193 -0
  352. examples/task_apps/sokoban/README.md +306 -0
  353. examples/task_apps/sokoban/__init__.py +3 -0
  354. examples/task_apps/sokoban/eval_groq_qwen32.toml +16 -0
  355. examples/task_apps/sokoban/eval_openai_gpt5.toml +16 -0
  356. examples/task_apps/sokoban/filter_sft.toml +5 -0
  357. examples/task_apps/sokoban/task_app.py +1058 -0
  358. examples/task_apps/sokoban/tests/__init__.py +4 -0
  359. examples/task_apps/sokoban/tests/conftest.py +113 -0
  360. examples/task_apps/sokoban/tests/integration/__init__.py +4 -0
  361. examples/task_apps/sokoban/tests/integration/test_sokoban_eval.py +57 -0
  362. examples/task_apps/sokoban/tests/integration/test_sokoban_rollout.py +198 -0
  363. examples/task_apps/sokoban/tests/unit/__init__.py +4 -0
  364. examples/task_apps/sokoban/tests/unit/test_sokoban_environment.py +114 -0
  365. examples/task_apps/verilog/__init__.py +1 -0
  366. examples/task_apps/verilog/eval_groq_qwen32b.toml +22 -0
  367. examples/task_apps/verilog/filter_sft.toml +5 -0
  368. examples/task_apps/verilog/task_app/README.md +12 -0
  369. examples/task_apps/verilog/task_app/__init__.py +1 -0
  370. examples/task_apps/verilog/task_app/grpo_verilog.py +1166 -0
  371. examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +145 -0
  372. examples/task_apps/verilog/tests/__init__.py +4 -0
  373. examples/task_apps/verilog/tests/conftest.py +115 -0
  374. examples/task_apps/verilog/tests/integration/__init__.py +4 -0
  375. examples/task_apps/verilog/tests/integration/test_verilog_eval.py +181 -0
  376. examples/task_apps/verilog/tests/integration/test_verilog_rollout.py +55 -0
  377. examples/task_apps/verilog/tests/unit/__init__.py +4 -0
  378. examples/task_apps/verilog/tests/unit/test_verilog_scoring.py +118 -0
  379. examples/tunnel_gepa_banking77/README.md +106 -0
  380. examples/tunnel_gepa_banking77/banking77_gepa_tunnel.toml +95 -0
  381. examples/tunnel_gepa_banking77/keep_tunnel_running.py +60 -0
  382. examples/tunnel_gepa_banking77/run_gepa_with_tunnel.sh +226 -0
  383. examples/vlm/PROPOSAL.md +53 -0
  384. examples/vlm/README.md +68 -0
  385. examples/vlm/configs/crafter_vlm_gpt4o.toml +49 -0
  386. examples/vlm/crafter_image_only_agent.py +207 -0
  387. examples/vlm/crafter_openai_vlm_agent.py +275 -0
  388. examples/vlm/filter_image_rows.py +63 -0
  389. examples/vlm/run_crafter_vlm_benchmark.py +316 -0
  390. examples/warming_up_to_rl/_utils.py +92 -0
  391. examples/warming_up_to_rl/analyze_trace_db.py +422 -0
  392. examples/warming_up_to_rl/configs/crafter_fft.toml +53 -0
  393. examples/warming_up_to_rl/configs/crafter_fft_4b.toml +54 -0
  394. examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +22 -0
  395. examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +15 -0
  396. examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +24 -0
  397. examples/warming_up_to_rl/configs/eval_stepwise_complex.toml +35 -0
  398. examples/warming_up_to_rl/configs/eval_stepwise_consistent.toml +26 -0
  399. examples/warming_up_to_rl/configs/eval_stepwise_per_achievement.toml +36 -0
  400. examples/warming_up_to_rl/configs/eval_stepwise_simple.toml +32 -0
  401. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +85 -0
  402. examples/warming_up_to_rl/configs/rl_from_ft.toml +58 -0
  403. examples/warming_up_to_rl/export_trace_sft.py +837 -0
  404. examples/warming_up_to_rl/groq_test.py +97 -0
  405. examples/warming_up_to_rl/manage_secrets.py +131 -0
  406. examples/warming_up_to_rl/old/event_rewards.md +234 -0
  407. examples/warming_up_to_rl/old/notes.md +73 -0
  408. examples/warming_up_to_rl/readme.md +110 -0
  409. examples/warming_up_to_rl/run_eval.py +736 -0
  410. examples/warming_up_to_rl/run_fft_and_save.py +380 -0
  411. examples/warming_up_to_rl/run_local_rollout.py +239 -0
  412. examples/warming_up_to_rl/run_local_rollout_modal.py +248 -0
  413. examples/warming_up_to_rl/run_local_rollout_parallel.py +405 -0
  414. examples/warming_up_to_rl/run_local_rollout_traced.py +477 -0
  415. examples/warming_up_to_rl/run_rl_and_save.py +124 -0
  416. examples/warming_up_to_rl/run_rollout_remote.py +156 -0
  417. examples/warming_up_to_rl/task_app/README.md +42 -0
  418. examples/warming_up_to_rl/task_app/grpo_crafter.py +876 -0
  419. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +135 -0
  420. examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
  421. examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
  422. examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +143 -0
  423. examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1226 -0
  424. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
  425. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
  426. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
  427. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +522 -0
  428. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +454 -0
  429. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +108 -0
  430. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
  431. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
  432. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +253 -0
  433. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
  434. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +729 -0
  435. examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +100 -0
  436. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +1114 -0
  437. examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +195 -0
  438. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1891 -0
  439. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
  440. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +211 -0
  441. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +161 -0
  442. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +137 -0
  443. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +129 -0
  444. examples/workflows/math_rl/configs/eval_base_qwen.toml +15 -0
  445. examples/workflows/math_rl/configs/eval_rl_qwen.toml +11 -0
  446. examples/workflows/math_rl/configs/rl_from_base_qwen.toml +62 -0
  447. examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +80 -0
  448. examples/workflows/math_rl/configs/rl_from_ft_qwen.toml +35 -0
  449. examples/workflows/math_rl/download_dataset.py +80 -0
  450. examples/workflows/math_rl/run_eval.py +436 -0
  451. examples/workflows/math_rl/run_rl_and_save.py +111 -0
  452. synth_ai/__init__.py +47 -23
  453. synth_ai/_utils/__init__.py +47 -0
  454. synth_ai/_utils/base_url.py +10 -0
  455. synth_ai/_utils/http.py +10 -0
  456. synth_ai/_utils/prompts.py +10 -0
  457. synth_ai/_utils/task_app_state.py +12 -0
  458. synth_ai/_utils/user_config.py +10 -0
  459. synth_ai/api/models/supported.py +514 -0
  460. synth_ai/api/train/__init__.py +60 -2
  461. synth_ai/api/train/builders.py +347 -39
  462. synth_ai/api/train/cli.py +895 -160
  463. synth_ai/api/train/config_finder.py +103 -25
  464. synth_ai/api/train/configs/__init__.py +65 -0
  465. synth_ai/api/train/configs/prompt_learning.py +496 -0
  466. synth_ai/api/train/configs/rl.py +188 -0
  467. synth_ai/api/train/configs/sft.py +99 -0
  468. synth_ai/api/train/configs/shared.py +81 -0
  469. synth_ai/api/train/env_resolver.py +70 -20
  470. synth_ai/api/train/pollers.py +29 -4
  471. synth_ai/api/train/prompt_learning.py +425 -0
  472. synth_ai/api/train/sft.py +390 -0
  473. synth_ai/api/train/supported_algos.py +147 -0
  474. synth_ai/api/train/task_app.py +6 -4
  475. synth_ai/api/train/utils.py +64 -52
  476. synth_ai/api/train/validators.py +1117 -0
  477. synth_ai/api/tunnel.py +49 -0
  478. synth_ai/auth/credentials.py +94 -0
  479. synth_ai/baseline/__init__.py +25 -0
  480. synth_ai/baseline/config.py +209 -0
  481. synth_ai/baseline/discovery.py +214 -0
  482. synth_ai/baseline/execution.py +146 -0
  483. synth_ai/cfgs.py +227 -0
  484. synth_ai/cli/__init__.py +85 -63
  485. synth_ai/cli/_modal_wrapper.py +31 -0
  486. synth_ai/cli/_storage.py +20 -0
  487. synth_ai/cli/_typer_patch.py +47 -0
  488. synth_ai/cli/_validate_task_app.py +29 -0
  489. synth_ai/cli/balance.py +16 -4
  490. synth_ai/cli/calc.py +36 -21
  491. synth_ai/cli/claude.py +70 -0
  492. synth_ai/cli/codex.py +267 -0
  493. synth_ai/cli/commands/__init__.py +18 -0
  494. synth_ai/cli/commands/baseline/__init__.py +12 -0
  495. synth_ai/cli/commands/baseline/core.py +637 -0
  496. synth_ai/cli/commands/baseline/list.py +93 -0
  497. synth_ai/cli/commands/demo/__init__.py +6 -0
  498. synth_ai/cli/commands/demo/core.py +163 -0
  499. synth_ai/cli/commands/eval/__init__.py +19 -0
  500. synth_ai/cli/commands/eval/core.py +1112 -0
  501. synth_ai/cli/commands/eval/errors.py +81 -0
  502. synth_ai/cli/commands/eval/validation.py +133 -0
  503. synth_ai/cli/commands/filter/__init__.py +12 -0
  504. synth_ai/cli/commands/filter/core.py +424 -0
  505. synth_ai/cli/commands/filter/errors.py +55 -0
  506. synth_ai/cli/commands/filter/validation.py +77 -0
  507. synth_ai/cli/commands/help/__init__.py +185 -0
  508. synth_ai/cli/commands/help/core.py +72 -0
  509. synth_ai/cli/commands/smoke/__init__.py +7 -0
  510. synth_ai/cli/commands/smoke/core.py +1437 -0
  511. synth_ai/cli/commands/status/__init__.py +66 -0
  512. synth_ai/cli/commands/status/client.py +192 -0
  513. synth_ai/cli/commands/status/config.py +92 -0
  514. synth_ai/cli/commands/status/errors.py +20 -0
  515. synth_ai/cli/commands/status/formatters.py +164 -0
  516. synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
  517. synth_ai/cli/commands/status/subcommands/files.py +79 -0
  518. synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
  519. synth_ai/cli/commands/status/subcommands/models.py +79 -0
  520. synth_ai/cli/commands/status/subcommands/pricing.py +22 -0
  521. synth_ai/cli/commands/status/subcommands/runs.py +81 -0
  522. synth_ai/cli/commands/status/subcommands/session.py +183 -0
  523. synth_ai/cli/commands/status/subcommands/summary.py +47 -0
  524. synth_ai/cli/commands/status/subcommands/usage.py +203 -0
  525. synth_ai/cli/commands/status/utils.py +114 -0
  526. synth_ai/cli/commands/train/__init__.py +53 -0
  527. synth_ai/cli/commands/train/core.py +21 -0
  528. synth_ai/cli/commands/train/errors.py +117 -0
  529. synth_ai/cli/commands/train/judge_schemas.py +200 -0
  530. synth_ai/cli/commands/train/judge_validation.py +305 -0
  531. synth_ai/cli/commands/train/validation.py +386 -0
  532. synth_ai/cli/demo.py +32 -140
  533. synth_ai/cli/deploy.py +233 -0
  534. synth_ai/cli/eval/__init__.py +36 -0
  535. synth_ai/cli/eval/core.py +5 -0
  536. synth_ai/cli/eval/errors.py +31 -0
  537. synth_ai/cli/eval/validation.py +5 -0
  538. synth_ai/cli/filter/__init__.py +28 -0
  539. synth_ai/cli/filter/core.py +5 -0
  540. synth_ai/cli/filter/errors.py +23 -0
  541. synth_ai/cli/filter/validation.py +5 -0
  542. synth_ai/cli/legacy_root_backup.py +28 -22
  543. synth_ai/cli/lib/__init__.py +10 -0
  544. synth_ai/cli/lib/task_app_discovery.py +7 -0
  545. synth_ai/cli/lib/task_app_env.py +518 -0
  546. synth_ai/cli/mcp.py +34 -0
  547. synth_ai/cli/modal_serve/__init__.py +12 -0
  548. synth_ai/cli/modal_serve/core.py +14 -0
  549. synth_ai/cli/modal_serve/errors.py +8 -0
  550. synth_ai/cli/modal_serve/validation.py +11 -0
  551. synth_ai/cli/opencode.py +256 -0
  552. synth_ai/cli/recent.py +13 -7
  553. synth_ai/cli/rl_demo.py +156 -116
  554. synth_ai/cli/root.py +131 -132
  555. synth_ai/cli/serve/__init__.py +12 -0
  556. synth_ai/cli/serve/core.py +14 -0
  557. synth_ai/cli/serve/errors.py +8 -0
  558. synth_ai/cli/serve/validation.py +11 -0
  559. synth_ai/cli/setup.py +49 -0
  560. synth_ai/cli/status.py +7 -125
  561. synth_ai/cli/task_app_deploy.py +7 -0
  562. synth_ai/cli/task_app_list.py +25 -0
  563. synth_ai/cli/task_app_modal_serve.py +11 -0
  564. synth_ai/cli/task_app_serve.py +11 -0
  565. synth_ai/cli/task_apps.py +2284 -257
  566. synth_ai/cli/traces.py +9 -5
  567. synth_ai/cli/train/__init__.py +12 -0
  568. synth_ai/cli/train/core.py +21 -0
  569. synth_ai/cli/train/errors.py +8 -0
  570. synth_ai/cli/train/validation.py +24 -0
  571. synth_ai/cli/train.py +5 -0
  572. synth_ai/cli/turso.py +73 -0
  573. synth_ai/cli/watch.py +13 -18
  574. synth_ai/demos/__init__.py +10 -0
  575. synth_ai/demos/core/__init__.py +28 -1
  576. synth_ai/demos/core/cli.py +579 -291
  577. synth_ai/demos/crafter/__init__.py +1 -0
  578. synth_ai/demos/crafter/crafter_fft_4b.toml +55 -0
  579. synth_ai/demos/crafter/grpo_crafter_task_app.py +185 -0
  580. synth_ai/demos/crafter/rl_from_base_qwen4b.toml +74 -0
  581. synth_ai/demos/demo_registry.py +176 -0
  582. synth_ai/demos/demo_task_apps/__init__.py +3 -3
  583. synth_ai/demos/demo_task_apps/core.py +64 -28
  584. synth_ai/demos/demo_task_apps/crafter/__init__.py +1 -0
  585. synth_ai/demos/demo_task_apps/crafter/configs/crafter_fft_4b.toml +53 -0
  586. synth_ai/demos/demo_task_apps/crafter/configs/rl_from_base_qwen4b.toml +73 -0
  587. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +184 -0
  588. synth_ai/demos/demo_task_apps/math/_common.py +1 -2
  589. synth_ai/demos/demo_task_apps/math/app.py +2 -1
  590. synth_ai/demos/demo_task_apps/math/deploy_modal.py +3 -6
  591. synth_ai/demos/demo_task_apps/math/modal_task_app.py +185 -83
  592. synth_ai/demos/demo_task_apps/math/task_app_entry.py +0 -2
  593. synth_ai/demos/math/__init__.py +1 -0
  594. synth_ai/demos/math/_common.py +16 -0
  595. synth_ai/demos/math/app.py +38 -0
  596. synth_ai/demos/math/config.toml +76 -0
  597. synth_ai/demos/math/deploy_modal.py +54 -0
  598. synth_ai/demos/math/modal_task_app.py +703 -0
  599. synth_ai/demos/math/task_app_entry.py +51 -0
  600. synth_ai/environments/environment/core.py +7 -1
  601. synth_ai/environments/examples/bandit/engine.py +12 -5
  602. synth_ai/environments/examples/bandit/environment.py +0 -1
  603. synth_ai/environments/examples/bandit/taskset.py +4 -4
  604. synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +7 -4
  605. synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +9 -5
  606. synth_ai/environments/examples/crafter_classic/environment.py +93 -2
  607. synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +4 -3
  608. synth_ai/environments/examples/enron/engine.py +7 -2
  609. synth_ai/environments/examples/enron/environment.py +68 -0
  610. synth_ai/environments/examples/red/engine.py +60 -12
  611. synth_ai/environments/examples/red/engine_helpers/memory_map.py +7 -0
  612. synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
  613. synth_ai/environments/examples/red/engine_helpers/reward_library/pallet_town_progression.py +477 -0
  614. synth_ai/environments/examples/red/engine_helpers/state_extraction.py +32 -0
  615. synth_ai/environments/examples/red/environment.py +86 -0
  616. synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
  617. synth_ai/environments/examples/sokoban/taskset.py +116 -0
  618. synth_ai/environments/examples/verilog/engine.py +104 -12
  619. synth_ai/environments/examples/wordle/environment.py +0 -1
  620. synth_ai/environments/reproducibility/tree.py +5 -6
  621. synth_ai/environments/service/app.py +11 -12
  622. synth_ai/environments/service/core_routes.py +10 -9
  623. synth_ai/environments/stateful/engine.py +1 -1
  624. synth_ai/environments/tasks/core.py +1 -0
  625. synth_ai/environments/tasks/filters.py +5 -6
  626. synth_ai/environments/tasks/utils.py +4 -5
  627. synth_ai/evals/__init__.py +15 -0
  628. synth_ai/evals/base.py +14 -5
  629. synth_ai/evals/client.py +82 -0
  630. synth_ai/evals/types.py +42 -0
  631. synth_ai/http.py +8 -22
  632. synth_ai/http_client.py +45 -12
  633. synth_ai/inference/__init__.py +0 -2
  634. synth_ai/inference/client.py +21 -7
  635. synth_ai/jobs/client.py +129 -80
  636. synth_ai/judge_schemas.py +127 -0
  637. synth_ai/learning/__init__.py +51 -6
  638. synth_ai/learning/algorithms.py +14 -0
  639. synth_ai/learning/client.py +122 -30
  640. synth_ai/learning/config.py +2 -40
  641. synth_ai/learning/constants.py +0 -2
  642. synth_ai/learning/ft_client.py +4 -56
  643. synth_ai/learning/health.py +14 -8
  644. synth_ai/learning/jobs.py +43 -47
  645. synth_ai/learning/prompt_learning_client.py +276 -0
  646. synth_ai/learning/prompt_learning_types.py +185 -0
  647. synth_ai/{rl → learning/rl}/__init__.py +14 -5
  648. synth_ai/learning/rl/client.py +269 -0
  649. synth_ai/learning/rl/config.py +31 -0
  650. synth_ai/{rl → learning/rl}/contracts.py +5 -10
  651. synth_ai/{rl → learning/rl}/env_keys.py +45 -16
  652. synth_ai/learning/rl/secrets.py +13 -0
  653. synth_ai/learning/rl_client.py +2 -253
  654. synth_ai/learning/sft/__init__.py +29 -0
  655. synth_ai/learning/sft/client.py +68 -0
  656. synth_ai/learning/sft/config.py +270 -0
  657. synth_ai/learning/sft/data.py +698 -0
  658. synth_ai/learning/sse.py +25 -26
  659. synth_ai/learning/validators.py +29 -25
  660. synth_ai/mcp/__init__.py +5 -0
  661. synth_ai/mcp/__main__.py +8 -0
  662. synth_ai/mcp/main.py +254 -0
  663. synth_ai/mcp/setup.py +100 -0
  664. synth_ai/modal.py +257 -0
  665. synth_ai/pricing/__init__.py +3 -0
  666. synth_ai/pricing/model_pricing.py +64 -0
  667. synth_ai/session/__init__.py +75 -0
  668. synth_ai/session/client.py +383 -0
  669. synth_ai/session/constants.py +63 -0
  670. synth_ai/session/exceptions.py +105 -0
  671. synth_ai/session/manager.py +139 -0
  672. synth_ai/session/models.py +89 -0
  673. synth_ai/session/query.py +110 -0
  674. synth_ai/spec/__init__.py +46 -0
  675. synth_ai/spec/dataclasses.py +149 -0
  676. synth_ai/spec/loader.py +144 -0
  677. synth_ai/spec/serializer.py +199 -0
  678. synth_ai/spec/validation.py +250 -0
  679. synth_ai/streaming/__init__.py +29 -0
  680. synth_ai/streaming/config.py +94 -0
  681. synth_ai/streaming/handlers.py +589 -0
  682. synth_ai/streaming/streamer.py +320 -0
  683. synth_ai/streaming/types.py +95 -0
  684. synth_ai/task/__init__.py +50 -30
  685. synth_ai/task/apps/__init__.py +63 -19
  686. synth_ai/task/auth.py +35 -23
  687. synth_ai/task/client.py +15 -13
  688. synth_ai/task/config.py +261 -0
  689. synth_ai/task/contracts.py +165 -64
  690. synth_ai/task/datasets.py +9 -6
  691. synth_ai/task/errors.py +11 -10
  692. synth_ai/task/health.py +17 -11
  693. synth_ai/task/inference_api.py +101 -0
  694. synth_ai/task/json.py +58 -24
  695. synth_ai/task/proxy.py +59 -66
  696. synth_ai/task/rubrics/__init__.py +55 -0
  697. synth_ai/task/rubrics/loaders.py +156 -0
  698. synth_ai/task/rubrics/models.py +57 -0
  699. synth_ai/task/rubrics/scoring.py +116 -0
  700. synth_ai/task/rubrics/strict.py +149 -0
  701. synth_ai/task/rubrics.py +22 -15
  702. synth_ai/task/server.py +65 -31
  703. synth_ai/task/trace_correlation_helpers.py +328 -0
  704. synth_ai/task/tracing_utils.py +44 -28
  705. synth_ai/task/validators.py +449 -6
  706. synth_ai/task/vendors.py +5 -7
  707. synth_ai/tracing_v3/__init__.py +4 -0
  708. synth_ai/tracing_v3/abstractions.py +21 -4
  709. synth_ai/tracing_v3/config.py +167 -22
  710. synth_ai/tracing_v3/constants.py +21 -0
  711. synth_ai/tracing_v3/db_config.py +42 -29
  712. synth_ai/tracing_v3/decorators.py +80 -45
  713. synth_ai/tracing_v3/examples/basic_usage.py +15 -9
  714. synth_ai/tracing_v3/hooks.py +6 -4
  715. synth_ai/tracing_v3/llm_call_record_helpers.py +161 -61
  716. synth_ai/tracing_v3/migration_helper.py +1 -2
  717. synth_ai/tracing_v3/replica_sync.py +12 -7
  718. synth_ai/tracing_v3/serialization.py +130 -0
  719. synth_ai/tracing_v3/session_tracer.py +73 -16
  720. synth_ai/tracing_v3/storage/base.py +89 -1
  721. synth_ai/tracing_v3/storage/config.py +63 -16
  722. synth_ai/tracing_v3/storage/factory.py +11 -9
  723. synth_ai/tracing_v3/storage/utils.py +15 -11
  724. synth_ai/tracing_v3/trace_utils.py +317 -0
  725. synth_ai/tracing_v3/turso/__init__.py +8 -21
  726. synth_ai/tracing_v3/turso/daemon.py +123 -15
  727. synth_ai/tracing_v3/turso/models.py +5 -2
  728. synth_ai/tracing_v3/turso/native_manager.py +1293 -0
  729. synth_ai/tracing_v3/utils.py +5 -4
  730. synth_ai/tunnel.py +143 -0
  731. synth_ai/tunnel_deploy.py +278 -0
  732. synth_ai/types.py +8 -0
  733. synth_ai/urls.py +11 -0
  734. synth_ai/utils/__init__.py +166 -0
  735. synth_ai/utils/agents.py +74 -0
  736. synth_ai/utils/apps.py +152 -0
  737. synth_ai/utils/base_url.py +94 -0
  738. synth_ai/utils/bin.py +39 -0
  739. synth_ai/utils/claude.py +36 -0
  740. synth_ai/utils/cli.py +284 -0
  741. synth_ai/utils/config.py +81 -0
  742. synth_ai/utils/env.py +346 -0
  743. synth_ai/utils/errors.py +85 -0
  744. synth_ai/utils/http.py +172 -0
  745. synth_ai/utils/json.py +72 -0
  746. synth_ai/utils/log_filter.py +99 -0
  747. synth_ai/utils/logging.py +198 -0
  748. synth_ai/utils/modal.py +299 -0
  749. synth_ai/utils/paths.py +95 -0
  750. synth_ai/utils/process.py +233 -0
  751. synth_ai/utils/prompts.py +39 -0
  752. synth_ai/utils/sqld.py +122 -0
  753. synth_ai/utils/ssl.py +25 -0
  754. synth_ai/utils/task_app_discovery.py +882 -0
  755. synth_ai/utils/task_app_env.py +186 -0
  756. synth_ai/utils/task_app_state.py +318 -0
  757. synth_ai/utils/tunnel/__init__.py +12 -0
  758. synth_ai/utils/tunnel/config.py +55 -0
  759. synth_ai/utils/user_config.py +137 -0
  760. synth_ai/uvicorn.py +77 -0
  761. synth_ai-0.2.23.dev3.dist-info/METADATA +357 -0
  762. synth_ai-0.2.23.dev3.dist-info/RECORD +983 -0
  763. {synth_ai-0.2.9.dev0.dist-info → synth_ai-0.2.23.dev3.dist-info}/entry_points.txt +0 -1
  764. {synth_ai-0.2.9.dev0.dist-info → synth_ai-0.2.23.dev3.dist-info}/top_level.txt +1 -0
  765. synth_ai/cli/man.py +0 -106
  766. synth_ai/core/experiment.py +0 -15
  767. synth_ai/core/system.py +0 -15
  768. synth_ai/demo_registry.py +0 -258
  769. synth_ai/environments/examples/sokoban/units/astar_common.py +0 -95
  770. synth_ai/experimental/synth_oss.py +0 -446
  771. synth_ai/handshake.py +0 -107
  772. synth_ai/install_sqld.sh +0 -40
  773. synth_ai/learning/offline/dpo.py +0 -0
  774. synth_ai/learning/offline/providers.py +0 -7
  775. synth_ai/learning/offline/sft.py +0 -0
  776. synth_ai/learning/offline/shared.py +0 -0
  777. synth_ai/learning/online/grpo.py +0 -0
  778. synth_ai/learning/online/irft.py +0 -0
  779. synth_ai/learning/prompts/banking77_injection_eval.py +0 -168
  780. synth_ai/learning/prompts/gepa.py +0 -0
  781. synth_ai/learning/prompts/hello_world_in_context_injection_ex.py +0 -213
  782. synth_ai/learning/prompts/mipro.py +0 -289
  783. synth_ai/learning/prompts/random_search.py +0 -246
  784. synth_ai/learning/prompts/run_mipro_banking77.py +0 -172
  785. synth_ai/learning/prompts/run_random_search_banking77.py +0 -324
  786. synth_ai/lm/__init__.py +0 -51
  787. synth_ai/lm/caching/constants.py +0 -6
  788. synth_ai/lm/caching/dbs.py +0 -0
  789. synth_ai/lm/caching/ephemeral.py +0 -102
  790. synth_ai/lm/caching/handler.py +0 -137
  791. synth_ai/lm/caching/initialize.py +0 -11
  792. synth_ai/lm/caching/persistent.py +0 -114
  793. synth_ai/lm/config.py +0 -110
  794. synth_ai/lm/constants.py +0 -32
  795. synth_ai/lm/core/__init__.py +0 -8
  796. synth_ai/lm/core/all.py +0 -73
  797. synth_ai/lm/core/exceptions.py +0 -7
  798. synth_ai/lm/core/main.py +0 -319
  799. synth_ai/lm/core/main_v3.py +0 -594
  800. synth_ai/lm/core/synth_models.py +0 -48
  801. synth_ai/lm/core/vendor_clients.py +0 -188
  802. synth_ai/lm/cost/monitor.py +0 -1
  803. synth_ai/lm/cost/statefulness.py +0 -1
  804. synth_ai/lm/injection.py +0 -80
  805. synth_ai/lm/overrides.py +0 -206
  806. synth_ai/lm/provider_support/__init__.py +0 -8
  807. synth_ai/lm/provider_support/anthropic.py +0 -972
  808. synth_ai/lm/provider_support/openai.py +0 -1139
  809. synth_ai/lm/provider_support/suppress_logging.py +0 -31
  810. synth_ai/lm/structured_outputs/handler.py +0 -440
  811. synth_ai/lm/structured_outputs/inject.py +0 -297
  812. synth_ai/lm/structured_outputs/rehabilitate.py +0 -185
  813. synth_ai/lm/tools/__init__.py +0 -3
  814. synth_ai/lm/tools/base.py +0 -172
  815. synth_ai/lm/unified_interface.py +0 -202
  816. synth_ai/lm/vendors/base.py +0 -81
  817. synth_ai/lm/vendors/core/anthropic_api.py +0 -387
  818. synth_ai/lm/vendors/core/gemini_api.py +0 -292
  819. synth_ai/lm/vendors/core/mistral_api.py +0 -322
  820. synth_ai/lm/vendors/core/openai_api.py +0 -225
  821. synth_ai/lm/vendors/core/synth_dev_api.py +0 -0
  822. synth_ai/lm/vendors/local/ollama.py +0 -0
  823. synth_ai/lm/vendors/openai_standard.py +0 -780
  824. synth_ai/lm/vendors/openai_standard_responses.py +0 -256
  825. synth_ai/lm/vendors/retries.py +0 -22
  826. synth_ai/lm/vendors/supported/custom_endpoint.py +0 -417
  827. synth_ai/lm/vendors/supported/deepseek.py +0 -69
  828. synth_ai/lm/vendors/supported/grok.py +0 -75
  829. synth_ai/lm/vendors/supported/groq.py +0 -16
  830. synth_ai/lm/vendors/supported/ollama.py +0 -15
  831. synth_ai/lm/vendors/supported/openrouter.py +0 -74
  832. synth_ai/lm/vendors/supported/together.py +0 -11
  833. synth_ai/lm/vendors/synth_client.py +0 -808
  834. synth_ai/lm/warmup.py +0 -186
  835. synth_ai/rl/secrets.py +0 -19
  836. synth_ai/scripts/verify_rewards.py +0 -100
  837. synth_ai/task/apps/grpo_crafter.py +0 -438
  838. synth_ai/tracing/__init__.py +0 -30
  839. synth_ai/tracing_v1/__init__.py +0 -33
  840. synth_ai/tracing_v3/turso/manager.py +0 -774
  841. synth_ai/v0/tracing/abstractions.py +0 -224
  842. synth_ai/v0/tracing/base_client.py +0 -91
  843. synth_ai/v0/tracing/client_manager.py +0 -131
  844. synth_ai/v0/tracing/config.py +0 -142
  845. synth_ai/v0/tracing/context.py +0 -146
  846. synth_ai/v0/tracing/decorators.py +0 -682
  847. synth_ai/v0/tracing/events/__init__.py +0 -0
  848. synth_ai/v0/tracing/events/manage.py +0 -147
  849. synth_ai/v0/tracing/events/scope.py +0 -86
  850. synth_ai/v0/tracing/events/store.py +0 -228
  851. synth_ai/v0/tracing/immediate_client.py +0 -151
  852. synth_ai/v0/tracing/local.py +0 -18
  853. synth_ai/v0/tracing/log_client_base.py +0 -73
  854. synth_ai/v0/tracing/retry_queue.py +0 -186
  855. synth_ai/v0/tracing/trackers.py +0 -515
  856. synth_ai/v0/tracing/upload.py +0 -512
  857. synth_ai/v0/tracing/utils.py +0 -9
  858. synth_ai/v0/tracing_v1/__init__.py +0 -16
  859. synth_ai/v0/tracing_v1/abstractions.py +0 -224
  860. synth_ai/v0/tracing_v1/base_client.py +0 -91
  861. synth_ai/v0/tracing_v1/client_manager.py +0 -131
  862. synth_ai/v0/tracing_v1/config.py +0 -142
  863. synth_ai/v0/tracing_v1/context.py +0 -146
  864. synth_ai/v0/tracing_v1/decorators.py +0 -703
  865. synth_ai/v0/tracing_v1/events/__init__.py +0 -0
  866. synth_ai/v0/tracing_v1/events/manage.py +0 -147
  867. synth_ai/v0/tracing_v1/events/scope.py +0 -86
  868. synth_ai/v0/tracing_v1/events/store.py +0 -228
  869. synth_ai/v0/tracing_v1/immediate_client.py +0 -151
  870. synth_ai/v0/tracing_v1/local.py +0 -18
  871. synth_ai/v0/tracing_v1/log_client_base.py +0 -73
  872. synth_ai/v0/tracing_v1/retry_queue.py +0 -186
  873. synth_ai/v0/tracing_v1/trackers.py +0 -515
  874. synth_ai/v0/tracing_v1/upload.py +0 -527
  875. synth_ai/v0/tracing_v1/utils.py +0 -9
  876. synth_ai/zyk/__init__.py +0 -30
  877. synth_ai-0.2.9.dev0.dist-info/METADATA +0 -131
  878. synth_ai-0.2.9.dev0.dist-info/RECORD +0 -444
  879. {synth_ai/lm/caching → examples/task_apps}/__init__.py +0 -0
  880. {synth_ai/lm/cost → examples/task_apps/crafter}/__init__.py +0 -0
  881. {synth_ai/lm/structured_outputs → examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/server}/__init__.py +0 -0
  882. {synth_ai/lm/vendors → examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/tests}/__init__.py +0 -0
  883. {synth_ai/lm/vendors/core → examples/task_apps/dev/pokemon_emerald/external/pokeagent-speedrun/utils}/__init__.py +0 -0
  884. {synth_ai/lm/vendors/local → examples/task_apps/math}/__init__.py +0 -0
  885. {synth_ai/lm/vendors/supported → examples/workflows}/__init__.py +0 -0
  886. {synth_ai/v0/tracing → examples/workflows/math_rl}/__init__.py +0 -0
  887. /synth_ai/{compound/cais.py → cli/__main__.py} +0 -0
  888. /synth_ai/{learning/filtering.py → py.typed} +0 -0
  889. {synth_ai-0.2.9.dev0.dist-info → synth_ai-0.2.23.dev3.dist-info}/WHEEL +0 -0
  890. {synth_ai-0.2.9.dev0.dist-info → synth_ai-0.2.23.dev3.dist-info}/licenses/LICENSE +0 -0
@@ -1,1139 +0,0 @@
1
- import copy
2
- import logging
3
- import types
4
- from collections import defaultdict
5
- from dataclasses import dataclass
6
- from inspect import isclass
7
-
8
- import openai.resources
9
- from langfuse import Langfuse
10
- from langfuse.client import StatefulGenerationClient
11
- from langfuse.decorators import langfuse_context
12
- from langfuse.utils import _get_timestamp
13
- from langfuse.utils.langfuse_singleton import LangfuseSingleton
14
- from packaging.version import Version
15
- from pydantic import BaseModel
16
- from wrapt import wrap_function_wrapper
17
-
18
- from synth_ai.lm.overrides import (
19
- apply_injection as apply_injection_overrides,
20
- )
21
- from synth_ai.lm.overrides import (
22
- apply_param_overrides,
23
- apply_tool_overrides,
24
- use_overrides_for_messages,
25
- )
26
- from synth_ai.lm.provider_support.suppress_logging import *
27
- from synth_ai.tracing_v1.abstractions import MessageInputs
28
- from synth_ai.tracing_v1.trackers import synth_tracker_async, synth_tracker_sync
29
-
30
- try:
31
- import openai
32
- except ImportError as err:
33
- raise ModuleNotFoundError(
34
- "Please install OpenAI to use this feature: 'pip install openai'"
35
- ) from err
36
-
37
- # CREDIT TO LANGFUSE FOR OPEN-SOURCING THE CODE THAT THIS IS BASED ON
38
- # USING WITH MIT LICENSE PERMISSION
39
- # https://langfuse.com
40
-
41
- try:
42
- from openai import AsyncAzureOpenAI, AsyncOpenAI, AzureOpenAI, OpenAI # noqa: F401
43
- except ImportError:
44
- AsyncAzureOpenAI = None
45
- AsyncOpenAI = None
46
- AzureOpenAI = None
47
- OpenAI = None
48
-
49
-
50
- # log = logging.getLogger("langfuse")
51
-
52
- # Add logger configuration
53
- logger = logging.getLogger(__name__)
54
- logger.setLevel(logging.DEBUG) # Set to DEBUG to see all messages
55
-
56
-
57
- @dataclass
58
- class OpenAiDefinition:
59
- module: str
60
- object: str
61
- method: str
62
- type: str
63
- sync: bool
64
- min_version: str | None = None
65
-
66
-
67
- OPENAI_METHODS_V0 = [
68
- OpenAiDefinition(
69
- module="openai",
70
- object="ChatCompletion",
71
- method="create",
72
- type="chat",
73
- sync=True,
74
- ),
75
- OpenAiDefinition(
76
- module="openai",
77
- object="Completion",
78
- method="create",
79
- type="completion",
80
- sync=True,
81
- ),
82
- ]
83
-
84
-
85
- OPENAI_METHODS_V1 = [
86
- OpenAiDefinition(
87
- module="openai.resources.chat.completions",
88
- object="Completions",
89
- method="create",
90
- type="chat",
91
- sync=True,
92
- ),
93
- OpenAiDefinition(
94
- module="openai.resources.completions",
95
- object="Completions",
96
- method="create",
97
- type="completion",
98
- sync=True,
99
- ),
100
- OpenAiDefinition(
101
- module="openai.resources.chat.completions",
102
- object="AsyncCompletions",
103
- method="create",
104
- type="chat",
105
- sync=False,
106
- ),
107
- OpenAiDefinition(
108
- module="openai.resources.completions",
109
- object="AsyncCompletions",
110
- method="create",
111
- type="completion",
112
- sync=False,
113
- ),
114
- OpenAiDefinition(
115
- module="openai.resources.chat.completions",
116
- object="Completions",
117
- method="parse",
118
- type="chat",
119
- sync=True,
120
- min_version="1.50.0",
121
- ),
122
- OpenAiDefinition(
123
- module="openai.resources.chat.completions",
124
- object="AsyncCompletions",
125
- method="parse",
126
- type="chat",
127
- sync=False,
128
- min_version="1.50.0",
129
- ),
130
- ]
131
-
132
-
133
- class OpenAiArgsExtractor:
134
- def __init__(
135
- self,
136
- name=None,
137
- metadata=None,
138
- trace_id=None,
139
- session_id=None,
140
- user_id=None,
141
- tags=None,
142
- parent_observation_id=None,
143
- langfuse_prompt=None, # we cannot use prompt because it's an argument of the old OpenAI completions API
144
- **kwargs,
145
- ):
146
- # logger.debug(f"OpenAiArgsExtractor initialized with kwargs: {kwargs}")
147
- # raise NotImplementedError("This method is not implemented yet")
148
- self.args = {}
149
- self.args["name"] = name
150
- self.args["metadata"] = (
151
- metadata
152
- if "response_format" not in kwargs
153
- else {
154
- **(metadata or {}),
155
- "response_format": kwargs["response_format"].model_json_schema()
156
- if isclass(kwargs["response_format"])
157
- and issubclass(kwargs["response_format"], BaseModel)
158
- else kwargs["response_format"],
159
- }
160
- )
161
- self.args["trace_id"] = trace_id
162
- self.args["session_id"] = session_id
163
- self.args["user_id"] = user_id
164
- self.args["tags"] = tags
165
- self.args["parent_observation_id"] = parent_observation_id
166
- self.args["langfuse_prompt"] = langfuse_prompt
167
- self.kwargs = kwargs
168
-
169
- def get_langfuse_args(self):
170
- return {**self.args, **self.kwargs}
171
-
172
- def get_openai_args(self):
173
- return self.kwargs
174
-
175
-
176
- def _langfuse_wrapper(func):
177
- def _with_langfuse(open_ai_definitions, initialize):
178
- def wrapper(wrapped, instance, args, kwargs):
179
- return func(open_ai_definitions, initialize, wrapped, args, kwargs)
180
-
181
- return wrapper
182
-
183
- return _with_langfuse
184
-
185
-
186
- def _extract_chat_prompt(kwargs: dict):
187
- """
188
- Extracts the user input from prompts. Returns an array of messages or a dict with messages and functions.
189
- """
190
- prompt = {}
191
-
192
- if kwargs.get("functions") is not None:
193
- prompt.update({"functions": kwargs["functions"]})
194
-
195
- if kwargs.get("function_call") is not None:
196
- prompt.update({"function_call": kwargs["function_call"]})
197
-
198
- if kwargs.get("tools") is not None:
199
- prompt.update({"tools": kwargs["tools"]})
200
-
201
- # existing logic to handle the case when prompt is not empty
202
- if prompt:
203
- messages = _filter_image_data(kwargs.get("messages", []))
204
- prompt.update({"messages": messages})
205
- return prompt
206
- else:
207
- # fallback: just return filtered messages
208
- messages = _filter_image_data(kwargs.get("messages", []))
209
- return messages
210
-
211
-
212
- def _extract_chat_response(kwargs: dict):
213
- """
214
- Extracts the LLM output from the response.
215
- """
216
- response = {
217
- "role": kwargs.get("role"),
218
- }
219
-
220
- if kwargs.get("function_call") is not None:
221
- response.update({"function_call": kwargs["function_call"]})
222
-
223
- if kwargs.get("tool_calls") is not None:
224
- response.update({"tool_calls": kwargs["tool_calls"]})
225
-
226
- response["content"] = kwargs.get("content")
227
- return response
228
-
229
-
230
- def _get_langfuse_data_from_kwargs(
231
- resource: OpenAiDefinition, langfuse: Langfuse, start_time, kwargs
232
- ):
233
- # print("DEBUG: Entering _get_langfuse_data_from_kwargs")
234
- # print("DEBUG: kwargs received:", kwargs)
235
-
236
- name = kwargs.get("name", "OpenAI-generation")
237
- # print("DEBUG: name =", name)
238
- if name is None:
239
- name = "OpenAI-generation"
240
-
241
- if name is not None and not isinstance(name, str):
242
- raise TypeError("name must be a string")
243
-
244
- decorator_context_observation_id = langfuse_context.get_current_observation_id()
245
- decorator_context_trace_id = langfuse_context.get_current_trace_id()
246
- # print("DEBUG: decorator_context_observation_id =", decorator_context_observation_id)
247
- # print("DEBUG: decorator_context_trace_id =", decorator_context_trace_id)
248
-
249
- trace_id = kwargs.get("trace_id", None) or decorator_context_trace_id
250
- # print("DEBUG: trace_id =", trace_id)
251
- if trace_id is not None and not isinstance(trace_id, str):
252
- raise TypeError("trace_id must be a string")
253
-
254
- session_id = kwargs.get("session_id", None)
255
- # print("DEBUG: session_id =", session_id)
256
- if session_id is not None and not isinstance(session_id, str):
257
- raise TypeError("session_id must be a string")
258
-
259
- user_id = kwargs.get("user_id", None)
260
- # print("DEBUG: user_id =", user_id)
261
- if user_id is not None and not isinstance(user_id, str):
262
- raise TypeError("user_id must be a string")
263
-
264
- tags = kwargs.get("tags", None)
265
- # print("DEBUG: tags =", tags)
266
- if tags is not None and (
267
- not isinstance(tags, list) or not all(isinstance(tag, str) for tag in tags)
268
- ):
269
- raise TypeError("tags must be a list of strings")
270
-
271
- if decorator_context_trace_id:
272
- langfuse_context.update_current_trace(session_id=session_id, user_id=user_id, tags=tags)
273
-
274
- parent_observation_id = kwargs.get("parent_observation_id", None) or (
275
- decorator_context_observation_id
276
- if decorator_context_observation_id != decorator_context_trace_id
277
- else None
278
- )
279
- # print("DEBUG: parent_observation_id =", parent_observation_id)
280
- if parent_observation_id is not None and not isinstance(parent_observation_id, str):
281
- raise TypeError("parent_observation_id must be a string")
282
- if parent_observation_id is not None and trace_id is None:
283
- raise ValueError("parent_observation_id requires trace_id to be set")
284
-
285
- metadata = kwargs.get("metadata", {})
286
- # print("DEBUG: metadata =", metadata)
287
- if metadata is not None and not isinstance(metadata, dict):
288
- raise TypeError("metadata must be a dictionary")
289
-
290
- prompt = None
291
- if resource.type == "completion":
292
- prompt = kwargs.get("prompt", None)
293
- elif resource.type == "chat":
294
- prompt = _extract_chat_prompt(kwargs)
295
- # Extract model: first check top-level, then check inside 'inputs'
296
- model = kwargs.get("model", None)
297
- inputs = kwargs.get("inputs", {}) if kwargs.get("inputs", {}) else {}
298
- if isinstance(inputs, dict):
299
- # print("DEBUG: inputs =", inputs)
300
- if "model_name" in inputs:
301
- detailed_model = inputs["model_name"]
302
- print("DEBUG: detailed_model =", detailed_model)
303
- # If a detailed_model exists and is different from the top-level model, use it.
304
- if detailed_model and (not model or model != detailed_model):
305
- print("DEBUG: Upgrading model value from", model, "to", detailed_model)
306
- model = detailed_model
307
- # print("DEBUG: final model =", model)
308
-
309
- # Extract model hyperparameters and add them to the new field 'model_params'
310
- model_params = {
311
- "temperature": kwargs.get("temperature", 1),
312
- "max_tokens": kwargs.get("max_tokens", float("inf")),
313
- "top_p": kwargs.get("top_p", 1),
314
- "frequency_penalty": kwargs.get("frequency_penalty", 0),
315
- "presence_penalty": kwargs.get("presence_penalty", 0),
316
- }
317
- if kwargs.get("seed", None) is not None:
318
- model_params["seed"] = kwargs.get("seed", None)
319
-
320
- is_nested_trace = False
321
- if trace_id:
322
- is_nested_trace = True
323
- langfuse.trace(id=trace_id, session_id=session_id, user_id=user_id, tags=tags)
324
- else:
325
- trace_instance = langfuse.trace(
326
- session_id=session_id,
327
- user_id=user_id,
328
- tags=tags,
329
- name=name,
330
- input=prompt,
331
- metadata=metadata,
332
- )
333
- trace_id = trace_instance.id
334
- # print("DEBUG: Generated new trace_id =", trace_id)
335
-
336
- langfuse_prompt = kwargs.get("langfuse_prompt", None)
337
-
338
- extracted_data = {
339
- "name": name,
340
- "metadata": metadata,
341
- "trace_id": trace_id,
342
- "parent_observation_id": parent_observation_id,
343
- "user_id": user_id,
344
- "start_time": start_time,
345
- "input": prompt,
346
- "model_params": {
347
- "model_name": model or None,
348
- "temperature": kwargs.get("temperature", 1),
349
- "max_tokens": kwargs.get("max_tokens", float("inf")),
350
- "top_p": kwargs.get("top_p", 1),
351
- "frequency_penalty": kwargs.get("frequency_penalty", 0),
352
- "presence_penalty": kwargs.get("presence_penalty", 0),
353
- },
354
- "prompt": langfuse_prompt,
355
- }
356
-
357
- # Add seed to model_params if present
358
- if kwargs.get("seed", None) is not None:
359
- extracted_data["model_params"]["seed"] = kwargs.get("seed", None)
360
-
361
- # print("DEBUG: Exiting _get_langfuse_data_from_kwargs with extracted_data:")
362
- # print(extracted_data)
363
- # print("DEBUG: is_nested_trace =", is_nested_trace)
364
-
365
- return extracted_data, is_nested_trace
366
-
367
-
368
- def _create_langfuse_update(
369
- completion,
370
- generation: StatefulGenerationClient,
371
- completion_start_time,
372
- model=None,
373
- usage=None,
374
- model_params=None,
375
- ):
376
- update = {
377
- "end_time": _get_timestamp(),
378
- "output": completion,
379
- "completion_start_time": completion_start_time,
380
- }
381
-
382
- # Create model_params dictionary
383
- model_params = {
384
- "model_name": model or None,
385
- }
386
-
387
- # Add hyperparameters if provided
388
- if model_params:
389
- model_params.update(model_params)
390
-
391
- # Add model_params to update
392
- update["model_params"] = model_params
393
-
394
- if usage is not None:
395
- update["usage"] = usage
396
-
397
- generation.update(**update)
398
-
399
-
400
- def _extract_streamed_openai_response(resource, chunks):
401
- # logger.debug(f"Extracting streamed response for resource type: {resource.type}")
402
- # logger.debug(f"Number of chunks: {len(chunks)}")
403
- completion = defaultdict(str) if resource.type == "chat" else ""
404
- model = None
405
- usage = None
406
-
407
- for chunk in chunks:
408
- if _is_openai_v1():
409
- chunk = chunk.__dict__
410
- # logger.debug(f"Processing chunk: {chunk}")
411
-
412
- # Extract model name from chunk
413
- model = model or chunk.get("model", None) or None
414
-
415
- # Extract usage information
416
- chunk_usage = chunk.get("usage", None)
417
- if chunk_usage is not None:
418
- if _is_openai_v1():
419
- chunk_usage = chunk_usage.__dict__
420
- usage = chunk_usage
421
-
422
- # Process choices
423
- choices = chunk.get("choices", []) # noqa: F841
424
- # logger.debug(f"Extracted - model: {model}, choices: {choices}")
425
-
426
- # logger.debug(f"Final completion: {completion}")
427
- return model, completion, usage
428
-
429
-
430
- def _get_langfuse_data_from_default_response(resource: OpenAiDefinition, response):
431
- if response is None:
432
- return None, "<NoneType response returned from OpenAI>", None
433
-
434
- # Extract model name from response
435
- model = response.get("model", None) or None
436
-
437
- # Extract completion based on resource type
438
- completion = None
439
- if resource.type == "completion":
440
- choices = response.get("choices", [])
441
- if len(choices) > 0:
442
- choice = choices[-1]
443
- completion = choice.text if _is_openai_v1() else choice.get("text", None)
444
- elif resource.type == "chat":
445
- choices = response.get("choices", [])
446
- if len(choices) > 0:
447
- choice = choices[-1]
448
- completion = (
449
- _extract_chat_response(choice.message.__dict__)
450
- if _is_openai_v1()
451
- else choice.get("message", None)
452
- )
453
-
454
- # Extract usage information
455
- usage = response.get("usage", None)
456
- if _is_openai_v1() and usage is not None:
457
- usage = usage.__dict__
458
-
459
- return model, completion, usage
460
-
461
-
462
- def _is_openai_v1():
463
- return Version(openai.__version__) >= Version("1.0.0")
464
-
465
-
466
- def _is_streaming_response(response):
467
- return (
468
- isinstance(response, types.GeneratorType)
469
- or isinstance(response, types.AsyncGeneratorType)
470
- or (_is_openai_v1() and isinstance(response, openai.Stream))
471
- or (_is_openai_v1() and isinstance(response, openai.AsyncStream))
472
- )
473
-
474
-
475
- @_langfuse_wrapper
476
- def _wrap(open_ai_resource: OpenAiDefinition, initialize, wrapped, args, kwargs):
477
- new_langfuse: Langfuse = initialize()
478
-
479
- start_time = _get_timestamp()
480
- arg_extractor = OpenAiArgsExtractor(*args, **kwargs)
481
-
482
- generation, is_nested_trace = _get_langfuse_data_from_kwargs(
483
- open_ai_resource, new_langfuse, start_time, arg_extractor.get_langfuse_args()
484
- )
485
- generation = new_langfuse.generation(**generation)
486
- try:
487
- openai_args = arg_extractor.get_openai_args()
488
- # Apply context-scoped injection to chat messages if present
489
- if isinstance(openai_args, dict) and "messages" in openai_args:
490
- try:
491
- with use_overrides_for_messages(openai_args["messages"]): # type: ignore[arg-type]
492
- openai_args["messages"] = apply_injection_overrides(openai_args["messages"]) # type: ignore[arg-type]
493
- openai_args = apply_tool_overrides(openai_args)
494
- openai_args = apply_param_overrides(openai_args)
495
- except Exception:
496
- pass
497
- openai_response = wrapped(**openai_args)
498
-
499
- if _is_streaming_response(openai_response):
500
- return LangfuseResponseGeneratorSync(
501
- resource=open_ai_resource,
502
- response=openai_response,
503
- generation=generation,
504
- langfuse=new_langfuse,
505
- is_nested_trace=is_nested_trace,
506
- kwargs=arg_extractor.get_openai_args(),
507
- )
508
-
509
- else:
510
- model, completion, usage = _get_langfuse_data_from_default_response(
511
- open_ai_resource,
512
- (openai_response and openai_response.__dict__)
513
- if _is_openai_v1()
514
- else openai_response,
515
- )
516
- model_params = {
517
- "model_name": model or None,
518
- "temperature": kwargs.get("temperature", 1),
519
- "max_tokens": kwargs.get("max_tokens", float("inf")),
520
- "top_p": kwargs.get("top_p", 1),
521
- "frequency_penalty": kwargs.get("frequency_penalty", 0),
522
- "presence_penalty": kwargs.get("presence_penalty", 0),
523
- }
524
-
525
- # Collect messages
526
- if open_ai_resource.type == "completion":
527
- user_prompt = arg_extractor.get_openai_args().get("prompt", "")
528
- messages = [{"role": "user", "content": user_prompt}]
529
- message_input = MessageInputs(messages=messages)
530
-
531
- # Track user input
532
- synth_tracker_sync.track_lm(
533
- messages=message_input.messages,
534
- model_name=model,
535
- model_params=model_params,
536
- finetune=False,
537
- )
538
-
539
- # Track assistant output separately
540
- assistant_message = [{"role": "assistant", "content": completion}]
541
- synth_tracker_sync.track_lm_output(
542
- messages=assistant_message,
543
- model_name=model,
544
- model_params=model_params,
545
- finetune=False,
546
- )
547
-
548
- elif open_ai_resource.type == "chat":
549
- messages = openai_args.get("messages", [])
550
- message_input = MessageInputs(messages=messages)
551
-
552
- # Track user input
553
- synth_tracker_sync.track_lm(
554
- messages=message_input.messages,
555
- model_name=model,
556
- model_params=model_params,
557
- finetune=False,
558
- )
559
-
560
- # Track assistant output separately
561
- assistant_message = [{"role": "assistant", "content": completion["content"]}]
562
- synth_tracker_sync.track_lm_output(
563
- messages=assistant_message, model_name=model, finetune=False
564
- )
565
-
566
- else:
567
- message_input = MessageInputs(messages=[])
568
-
569
- # Use track_lm
570
- # synth_tracker_sync.track_lm(
571
- # messages=message_input.messages,
572
- # model_name=model,
573
- # model_params=model_params,finetune=False,
574
- # )
575
-
576
- if kwargs.get("seed", None) is not None:
577
- model_params["seed"] = kwargs.get("seed", None)
578
-
579
- generation.update(
580
- model_params=model_params,
581
- output=completion,
582
- end_time=_get_timestamp(),
583
- usage=usage,
584
- )
585
-
586
- # Avoiding the trace-update if trace-id is provided by user.
587
- if not is_nested_trace:
588
- new_langfuse.trace(id=generation.trace_id, output=completion)
589
-
590
- return openai_response
591
- except Exception as ex:
592
- # log.warning(ex)
593
- model = kwargs.get("model", None) or None
594
- model_params = {
595
- "model_name": model or None,
596
- "temperature": kwargs.get("temperature", 1),
597
- "max_tokens": kwargs.get("max_tokens", float("inf")),
598
- "top_p": kwargs.get("top_p", 1),
599
- "frequency_penalty": kwargs.get("frequency_penalty", 0),
600
- "presence_penalty": kwargs.get("presence_penalty", 0),
601
- }
602
- if kwargs.get("seed", None) is not None:
603
- model_params["seed"] = kwargs.get("seed", None)
604
-
605
- generation.update(
606
- end_time=_get_timestamp(),
607
- status_message=str(ex),
608
- level="ERROR",
609
- model_params=model_params,
610
- usage={"input_cost": 0, "output_cost": 0, "total_cost": 0},
611
- )
612
- raise ex
613
-
614
-
615
- @_langfuse_wrapper
616
- async def _wrap_async(open_ai_resource: OpenAiDefinition, initialize, wrapped, args, kwargs):
617
- new_langfuse = initialize()
618
- start_time = _get_timestamp()
619
- arg_extractor = OpenAiArgsExtractor(*args, **kwargs)
620
-
621
- generation, is_nested_trace = _get_langfuse_data_from_kwargs(
622
- open_ai_resource, new_langfuse, start_time, arg_extractor.get_langfuse_args()
623
- )
624
- generation = new_langfuse.generation(**generation)
625
-
626
- try:
627
- openai_args = arg_extractor.get_openai_args()
628
- # Apply context-scoped injection to chat messages if present
629
- if isinstance(openai_args, dict) and "messages" in openai_args:
630
- try:
631
- with use_overrides_for_messages(openai_args["messages"]): # type: ignore[arg-type]
632
- openai_args["messages"] = apply_injection_overrides(openai_args["messages"]) # type: ignore[arg-type]
633
- openai_args = apply_tool_overrides(openai_args)
634
- openai_args = apply_param_overrides(openai_args)
635
- except Exception:
636
- pass
637
- openai_response = await wrapped(**openai_args)
638
-
639
- if _is_streaming_response(openai_response):
640
- return LangfuseResponseGeneratorAsync(
641
- resource=open_ai_resource,
642
- response=openai_response,
643
- generation=generation,
644
- langfuse=new_langfuse,
645
- is_nested_trace=is_nested_trace,
646
- kwargs=arg_extractor.get_openai_args(),
647
- )
648
-
649
- else:
650
- model, completion, usage = _get_langfuse_data_from_default_response(
651
- open_ai_resource,
652
- (openai_response and openai_response.__dict__)
653
- if _is_openai_v1()
654
- else openai_response,
655
- )
656
- model_params = {
657
- "model_name": model or None,
658
- "temperature": kwargs.get("temperature", 1),
659
- "max_tokens": kwargs.get("max_tokens", float("inf")),
660
- "top_p": kwargs.get("top_p", 1),
661
- "frequency_penalty": kwargs.get("frequency_penalty", 0),
662
- "presence_penalty": kwargs.get("presence_penalty", 0),
663
- }
664
-
665
- # Collect messages
666
- if open_ai_resource.type == "completion":
667
- user_prompt = arg_extractor.get_openai_args().get("prompt", "")
668
- messages = [{"role": "user", "content": user_prompt}]
669
- message_input = MessageInputs(messages=messages)
670
-
671
- # Track user input
672
- synth_tracker_async.track_lm(
673
- messages=message_input.messages,
674
- model_name=model,
675
- model_params=model_params,
676
- finetune=False,
677
- )
678
-
679
- # Track assistant output separately
680
- assistant_message = [{"role": "assistant", "content": completion}]
681
- synth_tracker_async.track_lm_output(
682
- messages=assistant_message, model_name=model, finetune=False
683
- )
684
-
685
- elif open_ai_resource.type == "chat":
686
- messages = openai_args.get("messages", [])
687
- message_input = MessageInputs(messages=messages)
688
-
689
- # Track user input
690
- synth_tracker_async.track_lm(
691
- messages=message_input.messages,
692
- model_name=model,
693
- model_params=model_params,
694
- finetune=False,
695
- )
696
-
697
- # Track assistant output separately
698
- assistant_message = [{"role": "assistant", "content": completion["content"]}]
699
- synth_tracker_async.track_lm_output(
700
- messages=assistant_message, model_name=model, finetune=False
701
- )
702
-
703
- else:
704
- message_input = MessageInputs(messages=[])
705
-
706
- # Use track_lm
707
- # synth_tracker_async.track_lm(
708
- # messages=message_input.messages,
709
- # model_name=model,
710
- # model_params=model_params,finetune=False,
711
- # )
712
-
713
- # Create model_params dictionary
714
- model_params = {
715
- "model_name": model or None,
716
- "temperature": kwargs.get("temperature", 1),
717
- "max_tokens": kwargs.get("max_tokens", float("inf")),
718
- "top_p": kwargs.get("top_p", 1),
719
- "frequency_penalty": kwargs.get("frequency_penalty", 0),
720
- "presence_penalty": kwargs.get("presence_penalty", 0),
721
- }
722
- if kwargs.get("seed", None) is not None:
723
- model_params["seed"] = kwargs.get("seed", None)
724
-
725
- generation.update(
726
- model_params=model_params,
727
- output=completion,
728
- end_time=_get_timestamp(),
729
- usage=usage,
730
- )
731
- # Avoiding the trace-update if trace-id is provided by user.
732
- if not is_nested_trace:
733
- new_langfuse.trace(id=generation.trace_id, output=completion)
734
-
735
- return openai_response
736
- except Exception as ex:
737
- model = kwargs.get("model", None) or None
738
- model_params = {
739
- "model_name": model or None,
740
- "temperature": kwargs.get("temperature", 1),
741
- "max_tokens": kwargs.get("max_tokens", float("inf")),
742
- "top_p": kwargs.get("top_p", 1),
743
- "frequency_penalty": kwargs.get("frequency_penalty", 0),
744
- "presence_penalty": kwargs.get("presence_penalty", 0),
745
- }
746
- if kwargs.get("seed", None) is not None:
747
- model_params["seed"] = kwargs.get("seed", None)
748
-
749
- generation.update(
750
- end_time=_get_timestamp(),
751
- status_message=str(ex),
752
- level="ERROR",
753
- model_params=model_params,
754
- usage={"input_cost": 0, "output_cost": 0, "total_cost": 0},
755
- )
756
- raise ex
757
-
758
- async def close(self) -> None:
759
- """Close the response and release the connection.
760
-
761
- Automatically called if the response body is read to completion.
762
- """
763
- await self.response.close()
764
-
765
-
766
- class OpenAILangfuse:
767
- _langfuse: Langfuse | None = None
768
-
769
- def initialize(self):
770
- self._langfuse = LangfuseSingleton().get(
771
- public_key=openai.langfuse_public_key,
772
- secret_key=openai.langfuse_secret_key,
773
- host=openai.langfuse_host,
774
- debug=openai.langfuse_debug,
775
- enabled=openai.langfuse_enabled,
776
- sdk_integration="openai",
777
- sample_rate=openai.langfuse_sample_rate,
778
- )
779
-
780
- return self._langfuse
781
-
782
- def flush(cls):
783
- cls._langfuse.flush()
784
-
785
- def langfuse_auth_check(self):
786
- """Check if the provided Langfuse credentials (public and secret key) are valid.
787
-
788
- Raises:
789
- Exception: If no projects were found for the provided credentials.
790
-
791
- Note:
792
- This method is blocking. It is discouraged to use it in prod code.
793
- """
794
- if self._langfuse is None:
795
- self.initialize()
796
-
797
- return self._langfuse.auth_check()
798
-
799
- def register_tracing(self):
800
- resources = OPENAI_METHODS_V1 if _is_openai_v1() else OPENAI_METHODS_V0
801
-
802
- for resource in resources:
803
- if resource.min_version is not None and Version(openai.__version__) < Version(
804
- resource.min_version
805
- ):
806
- continue
807
-
808
- # Check if the method actually exists before trying to wrap it
809
- try:
810
- module = __import__(resource.module, fromlist=[resource.object])
811
- obj = getattr(module, resource.object, None)
812
- if obj and not hasattr(obj, resource.method):
813
- continue # Skip if method doesn't exist
814
- except (ImportError, AttributeError):
815
- continue # Skip if module or object doesn't exist
816
-
817
- wrap_function_wrapper(
818
- resource.module,
819
- f"{resource.object}.{resource.method}",
820
- _wrap(resource, self.initialize)
821
- if resource.sync
822
- else _wrap_async(resource, self.initialize),
823
- )
824
-
825
- openai.langfuse_public_key = None
826
- openai.langfuse_secret_key = None
827
- openai.langfuse_host = None
828
- openai.langfuse_debug = None
829
- openai.langfuse_enabled = True
830
- openai.langfuse_sample_rate = None
831
- openai.langfuse_mask = None
832
- openai.langfuse_auth_check = self.langfuse_auth_check
833
- openai.flush_langfuse = self.flush
834
-
835
-
836
- modifier = OpenAILangfuse()
837
- modifier.register_tracing()
838
-
839
-
840
- # DEPRECATED: Use `openai.langfuse_auth_check()` instead
841
- def auth_check():
842
- if modifier._langfuse is None:
843
- modifier.initialize()
844
-
845
- return modifier._langfuse.auth_check()
846
-
847
-
848
- def _filter_image_data(messages: list[dict]):
849
- """https://platform.openai.com/docs/guides/vision?lang=python
850
-
851
- The messages array remains the same, but the 'image_url' is removed from the 'content' array.
852
- It should only be removed if the value starts with 'data:image/jpeg;base64,'
853
-
854
- """
855
- output_messages = copy.deepcopy(messages)
856
-
857
- for message in output_messages:
858
- content = (
859
- message.get("content", None)
860
- if isinstance(message, dict)
861
- else getattr(message, "content", None)
862
- )
863
-
864
- if content is not None:
865
- for index, item in enumerate(content):
866
- if isinstance(item, dict) and item.get("image_url", None) is not None:
867
- url = item["image_url"]["url"]
868
- if url.startswith("data:image/"):
869
- del content[index]["image_url"]
870
-
871
- return output_messages
872
-
873
-
874
- class LangfuseResponseGeneratorSync:
875
- def __init__(
876
- self,
877
- *,
878
- resource,
879
- response,
880
- generation,
881
- langfuse,
882
- is_nested_trace,
883
- kwargs,
884
- ):
885
- self.items = []
886
- self.resource = resource
887
- self.response = response
888
- self.generation = generation
889
- self.langfuse = langfuse
890
- self.is_nested_trace = is_nested_trace
891
- self.kwargs = kwargs
892
- self.completion_start_time = None
893
-
894
- def __iter__(self):
895
- try:
896
- for i in self.response:
897
- self.items.append(i)
898
-
899
- if self.completion_start_time is None:
900
- self.completion_start_time = _get_timestamp()
901
-
902
- yield i
903
- finally:
904
- self._finalize()
905
-
906
- def __next__(self):
907
- try:
908
- item = self.response.__next__()
909
- self.items.append(item)
910
-
911
- if self.completion_start_time is None:
912
- self.completion_start_time = _get_timestamp()
913
-
914
- return item
915
-
916
- except StopIteration:
917
- self._finalize()
918
-
919
- raise
920
-
921
- def __enter__(self):
922
- return self.__iter__()
923
-
924
- def __exit__(self, exc_type, exc_value, traceback):
925
- pass
926
-
927
- def _finalize(self):
928
- logger.debug("Entering _finalize() in LangfuseResponseGeneratorSync...")
929
- # First, extract values from the streamed response items
930
- model, completion, usage = _extract_streamed_openai_response(self.resource, self.items)
931
- logger.debug("Extracted model=%s, completion=%s, usage=%s", model, completion, usage)
932
-
933
- # Look through the streamed items for a detailed model in the additional "inputs"
934
- for item in self.items:
935
- if isinstance(item, dict):
936
- inputs = item.get("inputs")
937
- if isinstance(inputs, dict):
938
- detailed = inputs.get("model_name")
939
- if detailed and detailed != model:
940
- logger.debug(
941
- "Upgrading model value from %s to %s based on streamed inputs",
942
- model,
943
- detailed,
944
- )
945
- model = detailed
946
- break
947
- logger.debug("Final model after _finalize check: %s", model)
948
-
949
- # Create model hyperparameters dictionary
950
- model_params = {
951
- "temperature": self.kwargs.get("temperature", 1),
952
- "max_tokens": self.kwargs.get("max_tokens", float("inf")),
953
- "top_p": self.kwargs.get("top_p", 1),
954
- "frequency_penalty": self.kwargs.get("frequency_penalty", 0),
955
- "presence_penalty": self.kwargs.get("presence_penalty", 0),
956
- }
957
- if self.kwargs.get("seed") is not None:
958
- model_params["seed"] = self.kwargs.get("seed")
959
-
960
- if self.resource.type == "completion":
961
- user_prompt = self.kwargs.get("prompt", "")
962
- messages = [
963
- {"role": "user", "content": user_prompt},
964
- {"role": "assistant", "content": completion},
965
- ]
966
- message_input = MessageInputs(messages=messages)
967
- elif self.resource.type == "chat":
968
- messages = self.kwargs.get("messages", [])
969
- logger.debug("Existing 'messages' from kwargs before appending: %s", messages)
970
- if isinstance(completion, dict) and "content" in completion:
971
- messages.append({"role": "assistant", "content": completion["content"]})
972
- message_input = MessageInputs(messages=messages)
973
- logger.debug("Final 'messages': %s", message_input.messages)
974
- else:
975
- message_input = MessageInputs(messages=[])
976
-
977
- logger.debug(
978
- "Calling track_lm (sync) with messages: %s, model: %s",
979
- message_input.messages,
980
- model,
981
- )
982
- synth_tracker_sync.track_lm(
983
- messages=message_input.messages,
984
- model_name=model,
985
- model_params=model_params,
986
- finetune=False,
987
- )
988
-
989
- # Avoid the trace update if a trace-id was provided by the user.
990
- if not self.is_nested_trace:
991
- self.langfuse.trace(id=self.generation.trace_id, output=completion)
992
-
993
- # Pass the updated model and hyperparameters downstream in the update event.
994
- _create_langfuse_update(
995
- completion,
996
- self.generation,
997
- self.completion_start_time,
998
- model=model,
999
- usage=usage,
1000
- model_params=model_params,
1001
- )
1002
-
1003
-
1004
- class LangfuseResponseGeneratorAsync:
1005
- def __init__(
1006
- self,
1007
- *,
1008
- resource,
1009
- response,
1010
- generation,
1011
- langfuse,
1012
- is_nested_trace,
1013
- kwargs,
1014
- ):
1015
- # logger.debug(f"LangfuseResponseGeneratorAsync initialized with kwargs: {kwargs}")
1016
- # logger.debug(f"Resource type: {resource.type}")
1017
- self.items = []
1018
- self.resource = resource
1019
- self.response = response
1020
- self.generation = generation
1021
- self.langfuse = langfuse
1022
- self.is_nested_trace = is_nested_trace
1023
- self.kwargs = kwargs
1024
- self.completion_start_time = None
1025
-
1026
- async def __aiter__(self):
1027
- try:
1028
- async for i in self.response:
1029
- self.items.append(i)
1030
-
1031
- if self.completion_start_time is None:
1032
- self.completion_start_time = _get_timestamp()
1033
-
1034
- yield i
1035
- finally:
1036
- await self._finalize()
1037
-
1038
- async def __anext__(self):
1039
- try:
1040
- item = await self.response.__anext__()
1041
- self.items.append(item)
1042
-
1043
- if self.completion_start_time is None:
1044
- self.completion_start_time = _get_timestamp()
1045
-
1046
- return item
1047
-
1048
- except StopAsyncIteration:
1049
- await self._finalize()
1050
-
1051
- raise
1052
-
1053
- async def __aenter__(self):
1054
- return self.__aiter__()
1055
-
1056
- async def __aexit__(self, exc_type, exc_value, traceback):
1057
- pass
1058
-
1059
- async def _finalize(self):
1060
- logger.debug("Entering _finalize() in LangfuseResponseGeneratorAsync...")
1061
- model, completion, usage = _extract_streamed_openai_response(self.resource, self.items)
1062
- logger.debug("Extracted model=%s, completion=%s, usage=%s", model, completion, usage)
1063
-
1064
- # Look through the streamed items for a detailed model in the additional "inputs"
1065
- for item in self.items:
1066
- if isinstance(item, dict):
1067
- inputs = item.get("inputs")
1068
- if isinstance(inputs, dict):
1069
- detailed = inputs.get("model_name")
1070
- if detailed and detailed != model:
1071
- logger.debug(
1072
- "Upgrading model value from %s to %s based on streamed inputs",
1073
- model,
1074
- detailed,
1075
- )
1076
- model = detailed
1077
- break
1078
- logger.debug("Final model after _finalize check: %s", model)
1079
-
1080
- # Create model hyperparameters dictionary
1081
- model_params = {
1082
- "temperature": self.kwargs.get("temperature", 1),
1083
- "max_tokens": self.kwargs.get("max_tokens", float("inf")),
1084
- "top_p": self.kwargs.get("top_p", 1),
1085
- "frequency_penalty": self.kwargs.get("frequency_penalty", 0),
1086
- "presence_penalty": self.kwargs.get("presence_penalty", 0),
1087
- }
1088
- if self.kwargs.get("seed") is not None:
1089
- model_params["seed"] = self.kwargs.get("seed")
1090
-
1091
- if self.resource.type == "completion":
1092
- user_prompt = self.kwargs.get("prompt", "")
1093
- messages = [
1094
- {"role": "user", "content": user_prompt},
1095
- {"role": "assistant", "content": completion},
1096
- ]
1097
- message_input = MessageInputs(messages=messages)
1098
- elif self.resource.type == "chat":
1099
- messages = self.kwargs.get("messages", [])
1100
- logger.debug("Existing 'messages' from kwargs before appending: %s", messages)
1101
- # If completion is a dict, ensure we extract 'content' safely
1102
- if isinstance(completion, dict) and "content" in completion:
1103
- messages.append({"role": "assistant", "content": completion["content"]})
1104
- message_input = MessageInputs(messages=messages)
1105
- logger.debug("Final 'messages': %s", message_input.messages)
1106
- else:
1107
- message_input = MessageInputs(messages=[])
1108
-
1109
- logger.debug(
1110
- "Calling track_lm (async) with messages: %s, model: %s",
1111
- message_input.messages,
1112
- model,
1113
- )
1114
- synth_tracker_async.track_lm(
1115
- messages=message_input.messages,
1116
- model_name=model,
1117
- model_params=model_params,
1118
- finetune=False,
1119
- )
1120
-
1121
- # Avoiding the trace-update if trace-id is provided by user.
1122
- if not self.is_nested_trace:
1123
- self.langfuse.trace(id=self.generation.trace_id, output=completion)
1124
-
1125
- _create_langfuse_update(
1126
- completion,
1127
- self.generation,
1128
- self.completion_start_time,
1129
- model=model,
1130
- usage=usage,
1131
- model_params=model_params,
1132
- )
1133
-
1134
- async def close(self) -> None:
1135
- """Close the response and release the connection.
1136
-
1137
- Automatically called if the response body is read to completion.
1138
- """
1139
- await self.response.close()