synth-ai 0.2.14__py3-none-any.whl → 0.2.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (354) hide show
  1. examples/README.md +1 -0
  2. examples/analyze_semantic_words.sh +2 -2
  3. examples/blog_posts/pokemon_vl/README.md +98 -0
  4. examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +25 -0
  5. examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
  6. examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
  7. examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +42 -0
  8. examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
  9. examples/blog_posts/warming_up_to_rl/README.md +158 -0
  10. examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
  11. examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
  12. examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
  13. examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
  14. examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +41 -0
  15. examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
  16. examples/dev/qwen3_32b_qlora_4xh100.toml +5 -0
  17. examples/multi_step/SFT_README.md +147 -0
  18. examples/multi_step/configs/crafter_rl_outcome.toml +1 -1
  19. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +73 -115
  20. examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +1 -1
  21. examples/multi_step/configs/crafter_rl_stepwise_simple.toml +1 -1
  22. examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
  23. examples/multi_step/configs/crafter_sft_qwen30b_lora.toml +62 -0
  24. examples/multi_step/configs/verilog_rl_lora.toml +80 -123
  25. examples/multi_step/convert_traces_to_sft.py +84 -0
  26. examples/multi_step/run_sft_qwen30b.sh +45 -0
  27. examples/qwen_coder/configs/coder_lora_30b.toml +1 -2
  28. examples/qwen_coder/configs/coder_lora_4b.toml +5 -1
  29. examples/qwen_coder/configs/coder_lora_small.toml +1 -2
  30. examples/qwen_vl/BUGS_AND_FIXES.md +232 -0
  31. examples/qwen_vl/IMAGE_VALIDATION_COMPLETE.md +271 -0
  32. examples/qwen_vl/IMAGE_VALIDATION_SUMMARY.md +260 -0
  33. examples/qwen_vl/INFERENCE_SFT_TESTS.md +412 -0
  34. examples/qwen_vl/NEXT_STEPS_2B.md +325 -0
  35. examples/qwen_vl/QUICKSTART.md +327 -0
  36. examples/qwen_vl/QUICKSTART_RL_VISION.md +110 -0
  37. examples/qwen_vl/README.md +152 -0
  38. examples/qwen_vl/RL_VISION_COMPLETE.md +475 -0
  39. examples/qwen_vl/RL_VISION_TESTING.md +333 -0
  40. examples/qwen_vl/SDK_VISION_INTEGRATION.md +328 -0
  41. examples/qwen_vl/SETUP_COMPLETE.md +274 -0
  42. examples/qwen_vl/VISION_TESTS_COMPLETE.md +489 -0
  43. examples/qwen_vl/VLM_PIPELINE_COMPLETE.md +242 -0
  44. examples/qwen_vl/__init__.py +2 -0
  45. examples/qwen_vl/collect_data_via_cli.md +415 -0
  46. examples/qwen_vl/collect_vision_traces.py +368 -0
  47. examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +110 -0
  48. examples/qwen_vl/configs/crafter_vlm_sft_example.toml +59 -0
  49. examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +26 -0
  50. examples/qwen_vl/configs/eval_gpt4o_vision_proper.toml +29 -0
  51. examples/qwen_vl/configs/eval_gpt5nano_vision.toml +26 -0
  52. examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
  53. examples/qwen_vl/configs/filter_qwen3vl_sft.toml +49 -0
  54. examples/qwen_vl/configs/filter_vision_sft.toml +52 -0
  55. examples/qwen_vl/configs/filter_vision_test.toml +8 -0
  56. examples/qwen_vl/configs/sft_qwen3_vl_2b_test.toml +54 -0
  57. examples/qwen_vl/crafter_gpt5nano_agent.py +308 -0
  58. examples/qwen_vl/crafter_qwen_vl_agent.py +300 -0
  59. examples/qwen_vl/run_vision_comparison.sh +61 -0
  60. examples/qwen_vl/run_vision_sft_pipeline.sh +175 -0
  61. examples/qwen_vl/test_image_validation.py +201 -0
  62. examples/qwen_vl/test_sft_vision_data.py +110 -0
  63. examples/rl/README.md +6 -6
  64. examples/rl/configs/eval_base_qwen.toml +17 -0
  65. examples/rl/configs/eval_rl_qwen.toml +13 -0
  66. examples/rl/configs/rl_from_base_qwen.toml +62 -0
  67. examples/rl/configs/rl_from_base_qwen17.toml +79 -0
  68. examples/rl/configs/rl_from_ft_qwen.toml +37 -0
  69. examples/rl/run_eval.py +436 -0
  70. examples/rl/run_rl_and_save.py +111 -0
  71. examples/rl/task_app/README.md +21 -0
  72. examples/rl/task_app/math_single_step.py +990 -0
  73. examples/rl/task_app/math_task_app.py +111 -0
  74. examples/run_crafter_demo.sh +2 -2
  75. examples/sft/README.md +6 -6
  76. examples/sft/configs/crafter_fft_qwen0p6b.toml +7 -2
  77. examples/sft/configs/crafter_lora_qwen0p6b.toml +7 -3
  78. examples/sft/evaluate.py +2 -4
  79. examples/sft/export_dataset.py +7 -4
  80. examples/swe/task_app/README.md +33 -3
  81. examples/swe/task_app/grpo_swe_mini.py +4 -1
  82. examples/swe/task_app/grpo_swe_mini_task_app.py +0 -12
  83. examples/swe/task_app/hosted/envs/crafter/react_agent.py +1 -1
  84. examples/swe/task_app/hosted/envs/mini_swe/environment.py +50 -23
  85. examples/swe/task_app/hosted/inference/openai_client.py +4 -4
  86. examples/swe/task_app/hosted/policy_routes.py +0 -2
  87. examples/swe/task_app/hosted/rollout.py +0 -8
  88. examples/swe/task_app/morph_backend.py +178 -0
  89. examples/task_apps/crafter/task_app/README.md +1 -1
  90. examples/task_apps/crafter/task_app/grpo_crafter.py +70 -10
  91. examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +1 -1
  92. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +63 -27
  93. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -2
  94. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +48 -50
  95. examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +75 -36
  96. examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +31 -15
  97. examples/task_apps/enron/__init__.py +1 -0
  98. examples/task_apps/enron/task_app/grpo_enron_task_app.py +1 -1
  99. examples/task_apps/math/README.md +1 -2
  100. examples/task_apps/pokemon_red/README.md +3 -4
  101. examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +6 -5
  102. examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +1 -2
  103. examples/task_apps/pokemon_red/task_app.py +36 -5
  104. examples/task_apps/sokoban/README.md +2 -3
  105. examples/task_apps/verilog/eval_groq_qwen32b.toml +12 -14
  106. examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +1 -1
  107. examples/vlm/README.md +3 -3
  108. examples/vlm/configs/crafter_vlm_gpt4o.toml +5 -0
  109. examples/vlm/crafter_openai_vlm_agent.py +3 -5
  110. examples/vlm/filter_image_rows.py +1 -1
  111. examples/vlm/run_crafter_vlm_benchmark.py +2 -2
  112. examples/warming_up_to_rl/_utils.py +92 -0
  113. examples/warming_up_to_rl/analyze_trace_db.py +1 -1
  114. examples/warming_up_to_rl/configs/crafter_fft.toml +5 -0
  115. examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +2 -0
  116. examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +2 -0
  117. examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +2 -1
  118. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +2 -1
  119. examples/warming_up_to_rl/configs/rl_from_ft.toml +2 -0
  120. examples/warming_up_to_rl/export_trace_sft.py +174 -60
  121. examples/warming_up_to_rl/readme.md +63 -132
  122. examples/warming_up_to_rl/run_fft_and_save.py +1 -1
  123. examples/warming_up_to_rl/run_local_rollout_traced.py +1 -1
  124. examples/warming_up_to_rl/run_rl_and_save.py +1 -1
  125. examples/warming_up_to_rl/task_app/README.md +42 -0
  126. examples/warming_up_to_rl/task_app/grpo_crafter.py +827 -0
  127. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +135 -0
  128. examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
  129. examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
  130. examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +143 -0
  131. examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1226 -0
  132. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
  133. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
  134. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
  135. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +522 -0
  136. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +454 -0
  137. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +108 -0
  138. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
  139. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
  140. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +204 -0
  141. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
  142. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +618 -0
  143. examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +100 -0
  144. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +1084 -0
  145. examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +195 -0
  146. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1861 -0
  147. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
  148. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +211 -0
  149. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +161 -0
  150. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +137 -0
  151. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +62 -0
  152. examples/workflows/math_rl/configs/rl_from_base_qwen.toml +27 -0
  153. examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +5 -0
  154. synth_ai/__init__.py +44 -30
  155. synth_ai/_utils/__init__.py +47 -0
  156. synth_ai/_utils/base_url.py +10 -0
  157. synth_ai/_utils/http.py +10 -0
  158. synth_ai/_utils/prompts.py +10 -0
  159. synth_ai/_utils/task_app_state.py +12 -0
  160. synth_ai/_utils/user_config.py +10 -0
  161. synth_ai/api/models/supported.py +144 -7
  162. synth_ai/api/train/__init__.py +13 -1
  163. synth_ai/api/train/builders.py +9 -3
  164. synth_ai/api/train/cli.py +155 -17
  165. synth_ai/api/train/config_finder.py +18 -11
  166. synth_ai/api/train/configs/__init__.py +8 -1
  167. synth_ai/api/train/configs/rl.py +32 -7
  168. synth_ai/api/train/configs/sft.py +6 -2
  169. synth_ai/api/train/configs/shared.py +59 -2
  170. synth_ai/api/train/env_resolver.py +13 -10
  171. synth_ai/auth/credentials.py +119 -0
  172. synth_ai/cli/__init__.py +61 -69
  173. synth_ai/cli/_modal_wrapper.py +7 -5
  174. synth_ai/cli/_typer_patch.py +0 -2
  175. synth_ai/cli/_validate_task_app.py +22 -4
  176. synth_ai/cli/commands/__init__.py +17 -0
  177. synth_ai/cli/commands/demo/__init__.py +6 -0
  178. synth_ai/cli/commands/demo/core.py +163 -0
  179. synth_ai/cli/commands/deploy/__init__.py +23 -0
  180. synth_ai/cli/commands/deploy/core.py +614 -0
  181. synth_ai/cli/commands/deploy/errors.py +72 -0
  182. synth_ai/cli/commands/deploy/validation.py +11 -0
  183. synth_ai/cli/commands/eval/__init__.py +19 -0
  184. synth_ai/cli/commands/eval/core.py +1109 -0
  185. synth_ai/cli/commands/eval/errors.py +81 -0
  186. synth_ai/cli/commands/eval/validation.py +133 -0
  187. synth_ai/cli/commands/filter/__init__.py +12 -0
  188. synth_ai/cli/commands/filter/core.py +388 -0
  189. synth_ai/cli/commands/filter/errors.py +55 -0
  190. synth_ai/cli/commands/filter/validation.py +77 -0
  191. synth_ai/cli/commands/help/__init__.py +177 -0
  192. synth_ai/cli/commands/help/core.py +73 -0
  193. synth_ai/cli/commands/status/__init__.py +64 -0
  194. synth_ai/cli/commands/status/client.py +192 -0
  195. synth_ai/cli/commands/status/config.py +92 -0
  196. synth_ai/cli/commands/status/errors.py +20 -0
  197. synth_ai/cli/commands/status/formatters.py +164 -0
  198. synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
  199. synth_ai/cli/commands/status/subcommands/files.py +79 -0
  200. synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
  201. synth_ai/cli/commands/status/subcommands/models.py +79 -0
  202. synth_ai/cli/commands/status/subcommands/runs.py +81 -0
  203. synth_ai/cli/commands/status/subcommands/summary.py +47 -0
  204. synth_ai/cli/commands/status/utils.py +114 -0
  205. synth_ai/cli/commands/train/__init__.py +53 -0
  206. synth_ai/cli/commands/train/core.py +21 -0
  207. synth_ai/cli/commands/train/errors.py +117 -0
  208. synth_ai/cli/commands/train/judge_schemas.py +199 -0
  209. synth_ai/cli/commands/train/judge_validation.py +304 -0
  210. synth_ai/cli/commands/train/validation.py +443 -0
  211. synth_ai/cli/demo.py +2 -162
  212. synth_ai/cli/deploy/__init__.py +28 -0
  213. synth_ai/cli/deploy/core.py +5 -0
  214. synth_ai/cli/deploy/errors.py +23 -0
  215. synth_ai/cli/deploy/validation.py +5 -0
  216. synth_ai/cli/eval/__init__.py +36 -0
  217. synth_ai/cli/eval/core.py +5 -0
  218. synth_ai/cli/eval/errors.py +31 -0
  219. synth_ai/cli/eval/validation.py +5 -0
  220. synth_ai/cli/filter/__init__.py +28 -0
  221. synth_ai/cli/filter/core.py +5 -0
  222. synth_ai/cli/filter/errors.py +23 -0
  223. synth_ai/cli/filter/validation.py +5 -0
  224. synth_ai/cli/legacy_root_backup.py +3 -1
  225. synth_ai/cli/lib/__init__.py +10 -0
  226. synth_ai/cli/lib/task_app_discovery.py +7 -0
  227. synth_ai/cli/lib/task_app_env.py +518 -0
  228. synth_ai/cli/modal_serve/__init__.py +12 -0
  229. synth_ai/cli/modal_serve/core.py +14 -0
  230. synth_ai/cli/modal_serve/errors.py +8 -0
  231. synth_ai/cli/modal_serve/validation.py +11 -0
  232. synth_ai/cli/recent.py +2 -1
  233. synth_ai/cli/serve/__init__.py +12 -0
  234. synth_ai/cli/serve/core.py +14 -0
  235. synth_ai/cli/serve/errors.py +8 -0
  236. synth_ai/cli/serve/validation.py +11 -0
  237. synth_ai/cli/setup.py +21 -0
  238. synth_ai/cli/status.py +7 -126
  239. synth_ai/cli/task_app_deploy.py +7 -0
  240. synth_ai/cli/task_app_list.py +25 -0
  241. synth_ai/cli/task_app_modal_serve.py +11 -0
  242. synth_ai/cli/task_app_serve.py +11 -0
  243. synth_ai/cli/task_apps.py +110 -1499
  244. synth_ai/cli/traces.py +1 -1
  245. synth_ai/cli/train/__init__.py +12 -0
  246. synth_ai/cli/train/core.py +21 -0
  247. synth_ai/cli/train/errors.py +8 -0
  248. synth_ai/cli/train/validation.py +24 -0
  249. synth_ai/cli/train.py +5 -0
  250. synth_ai/cli/turso.py +1 -1
  251. synth_ai/cli/watch.py +1 -1
  252. synth_ai/demos/__init__.py +10 -0
  253. synth_ai/demos/core/__init__.py +28 -1
  254. synth_ai/demos/crafter/__init__.py +1 -0
  255. synth_ai/demos/crafter/crafter_fft_4b.toml +55 -0
  256. synth_ai/demos/crafter/grpo_crafter_task_app.py +185 -0
  257. synth_ai/demos/crafter/rl_from_base_qwen4b.toml +74 -0
  258. synth_ai/demos/demo_registry.py +176 -0
  259. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
  260. synth_ai/demos/math/__init__.py +1 -0
  261. synth_ai/demos/math/_common.py +16 -0
  262. synth_ai/demos/math/app.py +38 -0
  263. synth_ai/demos/math/config.toml +76 -0
  264. synth_ai/demos/math/deploy_modal.py +54 -0
  265. synth_ai/demos/math/modal_task_app.py +702 -0
  266. synth_ai/demos/math/task_app_entry.py +51 -0
  267. synth_ai/environments/environment/core.py +7 -1
  268. synth_ai/environments/examples/bandit/engine.py +0 -1
  269. synth_ai/environments/examples/bandit/environment.py +0 -1
  270. synth_ai/environments/examples/red/engine.py +33 -12
  271. synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
  272. synth_ai/environments/examples/red/environment.py +26 -0
  273. synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
  274. synth_ai/environments/examples/wordle/environment.py +0 -1
  275. synth_ai/evals/base.py +16 -5
  276. synth_ai/evals/client.py +1 -1
  277. synth_ai/http.py +8 -22
  278. synth_ai/inference/client.py +1 -1
  279. synth_ai/judge_schemas.py +4 -5
  280. synth_ai/learning/client.py +1 -1
  281. synth_ai/learning/health.py +1 -1
  282. synth_ai/learning/jobs.py +1 -1
  283. synth_ai/learning/rl/client.py +4 -2
  284. synth_ai/learning/rl/env_keys.py +1 -1
  285. synth_ai/learning/rl/secrets.py +1 -1
  286. synth_ai/learning/sft/client.py +1 -1
  287. synth_ai/learning/sft/data.py +407 -4
  288. synth_ai/learning/validators.py +4 -1
  289. synth_ai/streaming/__init__.py +29 -0
  290. synth_ai/streaming/config.py +94 -0
  291. synth_ai/streaming/handlers.py +469 -0
  292. synth_ai/streaming/streamer.py +301 -0
  293. synth_ai/streaming/types.py +95 -0
  294. synth_ai/task/apps/__init__.py +4 -2
  295. synth_ai/task/config.py +6 -4
  296. synth_ai/task/rubrics/__init__.py +1 -2
  297. synth_ai/task/rubrics/loaders.py +14 -10
  298. synth_ai/task/rubrics.py +219 -0
  299. synth_ai/task/trace_correlation_helpers.py +24 -11
  300. synth_ai/task/tracing_utils.py +14 -3
  301. synth_ai/task/validators.py +0 -1
  302. synth_ai/tracing_v3/abstractions.py +3 -3
  303. synth_ai/tracing_v3/config.py +15 -13
  304. synth_ai/tracing_v3/constants.py +21 -0
  305. synth_ai/tracing_v3/db_config.py +3 -1
  306. synth_ai/tracing_v3/decorators.py +10 -7
  307. synth_ai/tracing_v3/llm_call_record_helpers.py +5 -5
  308. synth_ai/tracing_v3/migration_helper.py +1 -2
  309. synth_ai/tracing_v3/session_tracer.py +7 -7
  310. synth_ai/tracing_v3/storage/base.py +29 -29
  311. synth_ai/tracing_v3/storage/config.py +3 -3
  312. synth_ai/tracing_v3/turso/daemon.py +8 -9
  313. synth_ai/tracing_v3/turso/native_manager.py +80 -72
  314. synth_ai/tracing_v3/utils.py +2 -2
  315. synth_ai/utils/__init__.py +101 -0
  316. synth_ai/utils/base_url.py +94 -0
  317. synth_ai/utils/cli.py +131 -0
  318. synth_ai/utils/env.py +294 -0
  319. synth_ai/utils/http.py +172 -0
  320. synth_ai/utils/modal.py +308 -0
  321. synth_ai/utils/process.py +212 -0
  322. synth_ai/utils/prompts.py +39 -0
  323. synth_ai/utils/sqld.py +122 -0
  324. synth_ai/utils/task_app_discovery.py +882 -0
  325. synth_ai/utils/task_app_env.py +186 -0
  326. synth_ai/utils/task_app_state.py +318 -0
  327. synth_ai/utils/user_config.py +137 -0
  328. synth_ai/v0/config/__init__.py +1 -5
  329. synth_ai/v0/config/base_url.py +1 -7
  330. synth_ai/v0/tracing/config.py +1 -1
  331. synth_ai/v0/tracing/decorators.py +1 -1
  332. synth_ai/v0/tracing/upload.py +1 -1
  333. synth_ai/v0/tracing_v1/config.py +1 -1
  334. synth_ai/v0/tracing_v1/decorators.py +1 -1
  335. synth_ai/v0/tracing_v1/upload.py +1 -1
  336. {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/METADATA +91 -32
  337. {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/RECORD +341 -154
  338. synth_ai/cli/man.py +0 -106
  339. synth_ai/cli/tui.py +0 -57
  340. synth_ai/compound/cais.py +0 -0
  341. synth_ai/core/experiment.py +0 -13
  342. synth_ai/core/system.py +0 -15
  343. synth_ai/demo_registry.py +0 -295
  344. synth_ai/handshake.py +0 -109
  345. synth_ai/tui/__init__.py +0 -5
  346. synth_ai/tui/__main__.py +0 -13
  347. synth_ai/tui/cli/__init__.py +0 -1
  348. synth_ai/tui/cli/query_experiments.py +0 -164
  349. synth_ai/tui/cli/query_experiments_v3.py +0 -164
  350. synth_ai/tui/dashboard.py +0 -906
  351. {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/WHEEL +0 -0
  352. {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/entry_points.txt +0 -0
  353. {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/licenses/LICENSE +0 -0
  354. {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,301 @@
1
+ from __future__ import annotations
2
+
3
+ import contextlib
4
+ import random
5
+ from dataclasses import dataclass
6
+ from typing import Any, Iterable, Sequence
7
+
8
+ from synth_ai.http import AsyncHttpClient, sleep
9
+
10
+ from .config import StreamConfig
11
+ from .handlers import StreamHandler
12
+ from .types import StreamMessage, StreamType
13
+
14
+ TERMINAL_STATUSES = {"succeeded", "failed", "cancelled", "canceled", "completed"}
15
+ TERMINAL_EVENT_SUCCESS = {
16
+ "sft.job.completed",
17
+ "rl.train.completed",
18
+ "rl.job.completed",
19
+ "workflow.completed",
20
+ "training.completed",
21
+ }
22
+ TERMINAL_EVENT_FAILURE = {
23
+ "sft.job.failed",
24
+ "rl.train.failed",
25
+ "rl.job.failed",
26
+ "workflow.failed",
27
+ "training.failed",
28
+ }
29
+
30
+
31
+ @dataclass(slots=True)
32
+ class StreamEndpoints:
33
+ """Collection of endpoint paths (with optional fallbacks) to poll for a job."""
34
+
35
+ status: str | None
36
+ events: str | None = None
37
+ metrics: str | None = None
38
+ timeline: str | None = None
39
+ status_fallbacks: tuple[str, ...] = ()
40
+ event_fallbacks: tuple[str, ...] = ()
41
+ metric_fallbacks: tuple[str, ...] = ()
42
+ timeline_fallbacks: tuple[str, ...] = ()
43
+
44
+ @classmethod
45
+ def learning(cls, job_id: str) -> StreamEndpoints:
46
+ base = f"/learning/jobs/{job_id}"
47
+ return cls(
48
+ status=base,
49
+ events=f"{base}/events",
50
+ metrics=f"{base}/metrics",
51
+ timeline=f"{base}/timeline",
52
+ )
53
+
54
+ @classmethod
55
+ def rl(cls, job_id: str) -> StreamEndpoints:
56
+ base = f"/rl/jobs/{job_id}"
57
+ return cls(
58
+ status=base,
59
+ events=f"{base}/events",
60
+ metrics=f"{base}/metrics",
61
+ timeline=f"{base}/timeline",
62
+ status_fallbacks=(
63
+ f"/learning/jobs/{job_id}",
64
+ f"/orchestration/jobs/{job_id}",
65
+ ),
66
+ event_fallbacks=(
67
+ f"/learning/jobs/{job_id}/events",
68
+ f"/orchestration/jobs/{job_id}/events",
69
+ ),
70
+ metric_fallbacks=(
71
+ f"/learning/jobs/{job_id}/metrics",
72
+ ),
73
+ timeline_fallbacks=(
74
+ f"/learning/jobs/{job_id}/timeline",
75
+ ),
76
+ )
77
+
78
+
79
+ class JobStreamer:
80
+ """Poll job endpoints and dispatch messages to configured handlers."""
81
+
82
+ def __init__(
83
+ self,
84
+ *,
85
+ base_url: str,
86
+ api_key: str,
87
+ job_id: str,
88
+ endpoints: StreamEndpoints | None = None,
89
+ config: StreamConfig | None = None,
90
+ handlers: Sequence[StreamHandler] | None = None,
91
+ interval_seconds: float = 2.0,
92
+ timeout_seconds: float | None = None,
93
+ http_timeout: float = 60.0,
94
+ http_client: AsyncHttpClient | None = None,
95
+ sleep_fn= sleep,
96
+ ) -> None:
97
+ self.base_url = base_url.rstrip("/")
98
+ self.api_key = api_key
99
+ self.job_id = job_id
100
+ self.endpoints = endpoints or StreamEndpoints.learning(job_id)
101
+ self.config = config or StreamConfig.default()
102
+ self.handlers: list[StreamHandler] = list(handlers or [])
103
+ self.interval_seconds = interval_seconds
104
+ self.timeout_seconds = timeout_seconds
105
+ self.http_timeout = http_timeout
106
+ self._http = http_client
107
+ self._sleep = sleep_fn
108
+
109
+ status_sources: list[str | None] = [self.endpoints.status]
110
+ status_sources.extend(self.endpoints.status_fallbacks)
111
+ self._status_paths = [p for p in status_sources if p]
112
+
113
+ event_sources: list[str | None] = [self.endpoints.events]
114
+ event_sources.extend(self.endpoints.event_fallbacks)
115
+ self._event_paths = [p for p in event_sources if p]
116
+
117
+ metric_sources: list[str | None] = [self.endpoints.metrics]
118
+ metric_sources.extend(self.endpoints.metric_fallbacks)
119
+ self._metric_paths = [p for p in metric_sources if p]
120
+
121
+ timeline_sources: list[str | None] = [self.endpoints.timeline]
122
+ timeline_sources.extend(self.endpoints.timeline_fallbacks)
123
+ self._timeline_paths = [p for p in timeline_sources if p]
124
+
125
+ self._last_seq_by_stream: dict[str, int] = {}
126
+ self._last_step_by_metric: dict[str, int] = {}
127
+ self._seen_messages: set[str] = set()
128
+ self._last_status_payload: dict[str, Any] | None = None
129
+ self._last_status_value: str | None = None
130
+ self._terminal_seen = False
131
+ self._terminal_event_status: str | None = None
132
+
133
+ if not self.handlers:
134
+ from .handlers import CLIHandler
135
+
136
+ self.handlers = [CLIHandler()]
137
+
138
+ async def stream_until_terminal(self) -> dict[str, Any]:
139
+ """Stream configured endpoints until the job reaches a terminal state."""
140
+ http_cm = self._http or AsyncHttpClient(self.base_url, self.api_key, timeout=self.http_timeout)
141
+ async with http_cm as http:
142
+ while True:
143
+ status = await self._refresh_status(http)
144
+
145
+ event_messages = await self._poll_events(http)
146
+ metric_messages = await self._poll_metrics(http)
147
+ timeline_messages = await self._poll_timeline(http)
148
+
149
+ self._dispatch(event_messages + metric_messages + timeline_messages)
150
+
151
+ if self._terminal_seen or (status and status in TERMINAL_STATUSES):
152
+ break
153
+
154
+ await self._sleep(self.interval_seconds)
155
+
156
+ for handler in self.handlers:
157
+ with contextlib.suppress(Exception):
158
+ handler.flush()
159
+
160
+ final_status = self._terminal_event_status or self._last_status_value or "unknown"
161
+ if self._last_status_payload:
162
+ self._last_status_payload["status"] = final_status
163
+ return self._last_status_payload
164
+ return {"job_id": self.job_id, "status": final_status}
165
+
166
+ async def _refresh_status(self, http: AsyncHttpClient) -> str:
167
+ status_payload = await self._poll_status(http)
168
+ if status_payload:
169
+ self._last_status_payload = status_payload
170
+ status = str(status_payload.get("status") or status_payload.get("state") or "").lower()
171
+ if status:
172
+ self._last_status_value = status
173
+ if status in TERMINAL_STATUSES:
174
+ self._terminal_seen = True
175
+ return status
176
+ return self._last_status_value or ""
177
+
178
+ async def _poll_status(self, http: AsyncHttpClient) -> dict[str, Any] | None:
179
+ if StreamType.STATUS not in self.config.enabled_streams or not self._status_paths:
180
+ return None
181
+
182
+ for path in self._status_paths:
183
+ try:
184
+ data = await http.get(path)
185
+ except Exception:
186
+ continue
187
+ if isinstance(data, dict):
188
+ message = StreamMessage.from_status(self.job_id, data)
189
+ self._dispatch([message])
190
+ return data
191
+ return None
192
+
193
+ async def _poll_events(self, http: AsyncHttpClient) -> list[StreamMessage]:
194
+ if StreamType.EVENTS not in self.config.enabled_streams or not self._event_paths:
195
+ return []
196
+ messages: list[StreamMessage] = []
197
+ total = 0
198
+ for path in self._event_paths:
199
+ since = self._last_seq_by_stream.get(path, 0)
200
+ params = {"since_seq": since, "limit": 200}
201
+ try:
202
+ data = await http.get(path, params=params)
203
+ except Exception:
204
+ continue
205
+ raw_events = _extract_list(data, "events")
206
+ for event in raw_events:
207
+ seq = int(event.get("seq") or 0)
208
+ if seq <= self._last_seq_by_stream.get(path, 0):
209
+ continue
210
+ if not self.config.should_include_event(event):
211
+ continue
212
+ self._last_seq_by_stream[path] = seq
213
+ event_job_id = event.get("job_id") or self.job_id
214
+ event_message = StreamMessage.from_event(event_job_id, event)
215
+ event_type = str(event.get("type") or "").lower()
216
+ if event_type in TERMINAL_EVENT_SUCCESS:
217
+ self._terminal_seen = True
218
+ self._terminal_event_status = "succeeded"
219
+ elif event_type in TERMINAL_EVENT_FAILURE:
220
+ self._terminal_seen = True
221
+ self._terminal_event_status = "failed"
222
+ messages.append(event_message)
223
+ total += 1
224
+ if self.config.max_events_per_poll and total >= self.config.max_events_per_poll:
225
+ return messages
226
+ return messages
227
+
228
+ async def _poll_metrics(self, http: AsyncHttpClient) -> list[StreamMessage]:
229
+ if StreamType.METRICS not in self.config.enabled_streams or not self._metric_paths:
230
+ return []
231
+ messages: list[StreamMessage] = []
232
+ for path in self._metric_paths:
233
+ after = max(self._last_step_by_metric.values()) if self._last_step_by_metric else -1
234
+ params = {"after_step": after, "limit": 200}
235
+ try:
236
+ data = await http.get(path, params=params)
237
+ except Exception:
238
+ continue
239
+ points = _extract_list(data, "points")
240
+ for point in points:
241
+ name = point.get("name", "")
242
+ step = int(point.get("step") or -1)
243
+ if step <= self._last_step_by_metric.get(name, -1):
244
+ continue
245
+ if not self.config.should_include_metric(point):
246
+ continue
247
+ self._last_step_by_metric[name] = step
248
+ metric_job_id = point.get("job_id") or self.job_id
249
+ messages.append(StreamMessage.from_metric(metric_job_id, point))
250
+ return messages
251
+
252
+ async def _poll_timeline(self, http: AsyncHttpClient) -> list[StreamMessage]:
253
+ if StreamType.TIMELINE not in self.config.enabled_streams or not self._timeline_paths:
254
+ return []
255
+ messages: list[StreamMessage] = []
256
+ for path in self._timeline_paths:
257
+ try:
258
+ data = await http.get(path)
259
+ except Exception:
260
+ continue
261
+
262
+ timeline_entries = _extract_list(data, "events")
263
+ for entry in timeline_entries:
264
+ if not self.config.should_include_timeline(entry):
265
+ continue
266
+ timeline_job_id = entry.get("job_id") or self.job_id
267
+ phase = str(entry.get("phase") or "").lower()
268
+ if phase in TERMINAL_STATUSES:
269
+ self._terminal_seen = True
270
+ if phase in {"failed", "cancelled", "canceled"}:
271
+ self._terminal_event_status = "failed"
272
+ elif phase:
273
+ self._terminal_event_status = "succeeded"
274
+ messages.append(StreamMessage.from_timeline(timeline_job_id, entry))
275
+ return messages
276
+
277
+ def _dispatch(self, messages: Iterable[StreamMessage]) -> None:
278
+ for message in messages:
279
+ if self.config.deduplicate and message.key in self._seen_messages:
280
+ continue
281
+ if self.config.sample_rate < 1.0 and random.random() > self.config.sample_rate:
282
+ continue
283
+ if self.config.deduplicate:
284
+ self._seen_messages.add(message.key)
285
+
286
+ for handler in self.handlers:
287
+ try:
288
+ if handler.should_handle(message):
289
+ handler.handle(message)
290
+ except Exception:
291
+ pass
292
+
293
+
294
+ def _extract_list(data: Any, field: str) -> list[dict[str, Any]]:
295
+ raw = (data or {}).get(field) if isinstance(data, dict) else None
296
+ if isinstance(raw, list):
297
+ return [item for item in raw if isinstance(item, dict)]
298
+ return []
299
+
300
+
301
+ __all__ = ["JobStreamer", "StreamEndpoints"]
@@ -0,0 +1,95 @@
1
+ from __future__ import annotations
2
+
3
+ from dataclasses import dataclass
4
+ from enum import Enum, auto
5
+ from typing import Any
6
+
7
+
8
+ class StreamType(Enum):
9
+ """Categories of streaming payloads emitted by training jobs."""
10
+
11
+ STATUS = auto()
12
+ EVENTS = auto()
13
+ METRICS = auto()
14
+ TIMELINE = auto()
15
+
16
+ @property
17
+ def endpoint_path(self) -> str:
18
+ """Return the endpoint suffix used when polling this stream."""
19
+ return {
20
+ StreamType.STATUS: "",
21
+ StreamType.EVENTS: "/events",
22
+ StreamType.METRICS: "/metrics",
23
+ StreamType.TIMELINE: "/timeline",
24
+ }[self]
25
+
26
+
27
+ @dataclass(slots=True)
28
+ class StreamMessage:
29
+ """Unified representation of a streaming payload."""
30
+
31
+ stream_type: StreamType
32
+ timestamp: str
33
+ job_id: str
34
+ data: dict[str, Any]
35
+ seq: int | None = None
36
+ step: int | None = None
37
+ phase: str | None = None
38
+
39
+ @property
40
+ def key(self) -> str:
41
+ """Return a unique identifier used for deduplication."""
42
+ if self.stream_type is StreamType.EVENTS:
43
+ return f"event:{self.seq}"
44
+ if self.stream_type is StreamType.METRICS:
45
+ name = self.data.get("name", "")
46
+ return f"metric:{name}:{self.step}"
47
+ if self.stream_type is StreamType.TIMELINE:
48
+ return f"timeline:{self.phase}:{self.timestamp}"
49
+ return f"status:{self.timestamp}"
50
+
51
+ @classmethod
52
+ def from_status(cls, job_id: str, status_data: dict[str, Any]) -> StreamMessage:
53
+ """Create a message representing a job status payload."""
54
+ return cls(
55
+ stream_type=StreamType.STATUS,
56
+ timestamp=status_data.get("updated_at", "") or status_data.get("created_at", ""),
57
+ job_id=job_id,
58
+ data=status_data,
59
+ )
60
+
61
+ @classmethod
62
+ def from_event(cls, job_id: str, event_data: dict[str, Any]) -> StreamMessage:
63
+ """Create a message describing a job event."""
64
+ return cls(
65
+ stream_type=StreamType.EVENTS,
66
+ timestamp=event_data.get("created_at", ""),
67
+ job_id=job_id,
68
+ data=event_data,
69
+ seq=event_data.get("seq"),
70
+ )
71
+
72
+ @classmethod
73
+ def from_metric(cls, job_id: str, metric_data: dict[str, Any]) -> StreamMessage:
74
+ """Create a message describing a metric point."""
75
+ return cls(
76
+ stream_type=StreamType.METRICS,
77
+ timestamp=metric_data.get("created_at", ""),
78
+ job_id=job_id,
79
+ data=metric_data,
80
+ step=metric_data.get("step"),
81
+ )
82
+
83
+ @classmethod
84
+ def from_timeline(cls, job_id: str, timeline_data: dict[str, Any]) -> StreamMessage:
85
+ """Create a message describing a status timeline entry."""
86
+ return cls(
87
+ stream_type=StreamType.TIMELINE,
88
+ timestamp=timeline_data.get("created_at", ""),
89
+ job_id=job_id,
90
+ data=timeline_data,
91
+ phase=timeline_data.get("phase"),
92
+ )
93
+
94
+
95
+ __all__ = ["StreamMessage", "StreamType"]
@@ -50,10 +50,12 @@ class TaskAppRegistry:
50
50
 
51
51
  def register(self, entry: TaskAppEntry) -> None:
52
52
  if entry.app_id in self._entries:
53
- raise ValueError(f"Task app already registered: {entry.app_id}")
53
+ # Allow idempotent registration when modules are imported multiple times.
54
+ return
54
55
  self._entries[entry.app_id] = entry
55
56
  for alias in entry.aliases:
56
- if alias in self._alias_to_id:
57
+ existing = self._alias_to_id.get(alias)
58
+ if existing and existing != entry.app_id:
57
59
  raise ValueError(f"Alias already registered: {alias}")
58
60
  self._alias_to_id[alias] = entry.app_id
59
61
 
synth_ai/task/config.py CHANGED
@@ -185,9 +185,12 @@ class FilterConfig:
185
185
  raise ValueError(f"output must be a .jsonl or .json file, got: {self.output}")
186
186
 
187
187
  # Validate score thresholds
188
- if self.min_official_score is not None and self.max_official_score is not None:
189
- if self.min_official_score > self.max_official_score:
190
- raise ValueError("min_official_score cannot be greater than max_official_score")
188
+ if (
189
+ self.min_official_score is not None
190
+ and self.max_official_score is not None
191
+ and self.min_official_score > self.max_official_score
192
+ ):
193
+ raise ValueError("min_official_score cannot be greater than max_official_score")
191
194
 
192
195
  # Validate limit/offset
193
196
  if self.limit is not None and self.limit < 1:
@@ -254,4 +257,3 @@ class FilterConfig:
254
257
  output_path.parent.mkdir(parents=True, exist_ok=True)
255
258
  return output_path
256
259
 
257
-
@@ -9,10 +9,9 @@ This module provides:
9
9
  """
10
10
 
11
11
  # Core models (flexible validation)
12
- from .models import Criterion, Rubric
13
-
14
12
  # Loading and blending
15
13
  from .loaders import blend_rubrics, load_rubric
14
+ from .models import Criterion, Rubric
16
15
 
17
16
  # Scoring
18
17
  from .scoring import score_events_against_rubric, score_outcome_against_rubric
@@ -78,15 +78,20 @@ def load_rubric(source: str | dict[str, Any] | Rubric | None) -> Rubric | None:
78
78
  data = _parse_structured(text, suffix)
79
79
 
80
80
  # Check if this looks like a backend judge rubric (wrong format)
81
- if isinstance(data, dict) and "event" in data and "outcome" in data:
82
- # Missing required task app rubric fields
83
- if "version" not in data and "goal_text" not in data and "criteria" not in data:
84
- source_hint = f" ({source})" if isinstance(source, str) else ""
85
- raise ValueError(
86
- f"Rubric appears to be in backend judge format (has 'event'/'outcome' keys){source_hint}. "
87
- f"Task apps require rubrics with 'version', 'goal_text', and 'criteria' fields. "
88
- f"Backend judge rubrics should be named '*_backend_judge.json' and loaded by judge functions."
89
- )
81
+ if (
82
+ isinstance(data, dict)
83
+ and "event" in data
84
+ and "outcome" in data
85
+ and "version" not in data
86
+ and "goal_text" not in data
87
+ and "criteria" not in data
88
+ ):
89
+ source_hint = f" ({source})" if isinstance(source, str) else ""
90
+ raise ValueError(
91
+ f"Rubric appears to be in backend judge format (has 'event'/'outcome' keys){source_hint}. "
92
+ f"Task apps require rubrics with 'version', 'goal_text', and 'criteria' fields. "
93
+ f"Backend judge rubrics should be named '*_backend_judge.json' and loaded by judge functions."
94
+ )
90
95
 
91
96
  return Rubric.model_validate(data)
92
97
 
@@ -149,4 +154,3 @@ def blend_rubrics(base: Rubric | None, override: Rubric | None) -> Rubric | None
149
154
  criteria=merged,
150
155
  aggregation=aggregation,
151
156
  )
152
-