synth-ai 0.2.14__py3-none-any.whl → 0.2.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (354) hide show
  1. examples/README.md +1 -0
  2. examples/analyze_semantic_words.sh +2 -2
  3. examples/blog_posts/pokemon_vl/README.md +98 -0
  4. examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +25 -0
  5. examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
  6. examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
  7. examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +42 -0
  8. examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
  9. examples/blog_posts/warming_up_to_rl/README.md +158 -0
  10. examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
  11. examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
  12. examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
  13. examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
  14. examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +41 -0
  15. examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
  16. examples/dev/qwen3_32b_qlora_4xh100.toml +5 -0
  17. examples/multi_step/SFT_README.md +147 -0
  18. examples/multi_step/configs/crafter_rl_outcome.toml +1 -1
  19. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +73 -115
  20. examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +1 -1
  21. examples/multi_step/configs/crafter_rl_stepwise_simple.toml +1 -1
  22. examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
  23. examples/multi_step/configs/crafter_sft_qwen30b_lora.toml +62 -0
  24. examples/multi_step/configs/verilog_rl_lora.toml +80 -123
  25. examples/multi_step/convert_traces_to_sft.py +84 -0
  26. examples/multi_step/run_sft_qwen30b.sh +45 -0
  27. examples/qwen_coder/configs/coder_lora_30b.toml +1 -2
  28. examples/qwen_coder/configs/coder_lora_4b.toml +5 -1
  29. examples/qwen_coder/configs/coder_lora_small.toml +1 -2
  30. examples/qwen_vl/BUGS_AND_FIXES.md +232 -0
  31. examples/qwen_vl/IMAGE_VALIDATION_COMPLETE.md +271 -0
  32. examples/qwen_vl/IMAGE_VALIDATION_SUMMARY.md +260 -0
  33. examples/qwen_vl/INFERENCE_SFT_TESTS.md +412 -0
  34. examples/qwen_vl/NEXT_STEPS_2B.md +325 -0
  35. examples/qwen_vl/QUICKSTART.md +327 -0
  36. examples/qwen_vl/QUICKSTART_RL_VISION.md +110 -0
  37. examples/qwen_vl/README.md +152 -0
  38. examples/qwen_vl/RL_VISION_COMPLETE.md +475 -0
  39. examples/qwen_vl/RL_VISION_TESTING.md +333 -0
  40. examples/qwen_vl/SDK_VISION_INTEGRATION.md +328 -0
  41. examples/qwen_vl/SETUP_COMPLETE.md +274 -0
  42. examples/qwen_vl/VISION_TESTS_COMPLETE.md +489 -0
  43. examples/qwen_vl/VLM_PIPELINE_COMPLETE.md +242 -0
  44. examples/qwen_vl/__init__.py +2 -0
  45. examples/qwen_vl/collect_data_via_cli.md +415 -0
  46. examples/qwen_vl/collect_vision_traces.py +368 -0
  47. examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +110 -0
  48. examples/qwen_vl/configs/crafter_vlm_sft_example.toml +59 -0
  49. examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +26 -0
  50. examples/qwen_vl/configs/eval_gpt4o_vision_proper.toml +29 -0
  51. examples/qwen_vl/configs/eval_gpt5nano_vision.toml +26 -0
  52. examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
  53. examples/qwen_vl/configs/filter_qwen3vl_sft.toml +49 -0
  54. examples/qwen_vl/configs/filter_vision_sft.toml +52 -0
  55. examples/qwen_vl/configs/filter_vision_test.toml +8 -0
  56. examples/qwen_vl/configs/sft_qwen3_vl_2b_test.toml +54 -0
  57. examples/qwen_vl/crafter_gpt5nano_agent.py +308 -0
  58. examples/qwen_vl/crafter_qwen_vl_agent.py +300 -0
  59. examples/qwen_vl/run_vision_comparison.sh +61 -0
  60. examples/qwen_vl/run_vision_sft_pipeline.sh +175 -0
  61. examples/qwen_vl/test_image_validation.py +201 -0
  62. examples/qwen_vl/test_sft_vision_data.py +110 -0
  63. examples/rl/README.md +6 -6
  64. examples/rl/configs/eval_base_qwen.toml +17 -0
  65. examples/rl/configs/eval_rl_qwen.toml +13 -0
  66. examples/rl/configs/rl_from_base_qwen.toml +62 -0
  67. examples/rl/configs/rl_from_base_qwen17.toml +79 -0
  68. examples/rl/configs/rl_from_ft_qwen.toml +37 -0
  69. examples/rl/run_eval.py +436 -0
  70. examples/rl/run_rl_and_save.py +111 -0
  71. examples/rl/task_app/README.md +21 -0
  72. examples/rl/task_app/math_single_step.py +990 -0
  73. examples/rl/task_app/math_task_app.py +111 -0
  74. examples/run_crafter_demo.sh +2 -2
  75. examples/sft/README.md +6 -6
  76. examples/sft/configs/crafter_fft_qwen0p6b.toml +7 -2
  77. examples/sft/configs/crafter_lora_qwen0p6b.toml +7 -3
  78. examples/sft/evaluate.py +2 -4
  79. examples/sft/export_dataset.py +7 -4
  80. examples/swe/task_app/README.md +33 -3
  81. examples/swe/task_app/grpo_swe_mini.py +4 -1
  82. examples/swe/task_app/grpo_swe_mini_task_app.py +0 -12
  83. examples/swe/task_app/hosted/envs/crafter/react_agent.py +1 -1
  84. examples/swe/task_app/hosted/envs/mini_swe/environment.py +50 -23
  85. examples/swe/task_app/hosted/inference/openai_client.py +4 -4
  86. examples/swe/task_app/hosted/policy_routes.py +0 -2
  87. examples/swe/task_app/hosted/rollout.py +0 -8
  88. examples/swe/task_app/morph_backend.py +178 -0
  89. examples/task_apps/crafter/task_app/README.md +1 -1
  90. examples/task_apps/crafter/task_app/grpo_crafter.py +70 -10
  91. examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +1 -1
  92. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +63 -27
  93. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -2
  94. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +48 -50
  95. examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +75 -36
  96. examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +31 -15
  97. examples/task_apps/enron/__init__.py +1 -0
  98. examples/task_apps/enron/task_app/grpo_enron_task_app.py +1 -1
  99. examples/task_apps/math/README.md +1 -2
  100. examples/task_apps/pokemon_red/README.md +3 -4
  101. examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +6 -5
  102. examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +1 -2
  103. examples/task_apps/pokemon_red/task_app.py +36 -5
  104. examples/task_apps/sokoban/README.md +2 -3
  105. examples/task_apps/verilog/eval_groq_qwen32b.toml +12 -14
  106. examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +1 -1
  107. examples/vlm/README.md +3 -3
  108. examples/vlm/configs/crafter_vlm_gpt4o.toml +5 -0
  109. examples/vlm/crafter_openai_vlm_agent.py +3 -5
  110. examples/vlm/filter_image_rows.py +1 -1
  111. examples/vlm/run_crafter_vlm_benchmark.py +2 -2
  112. examples/warming_up_to_rl/_utils.py +92 -0
  113. examples/warming_up_to_rl/analyze_trace_db.py +1 -1
  114. examples/warming_up_to_rl/configs/crafter_fft.toml +5 -0
  115. examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +2 -0
  116. examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +2 -0
  117. examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +2 -1
  118. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +2 -1
  119. examples/warming_up_to_rl/configs/rl_from_ft.toml +2 -0
  120. examples/warming_up_to_rl/export_trace_sft.py +174 -60
  121. examples/warming_up_to_rl/readme.md +63 -132
  122. examples/warming_up_to_rl/run_fft_and_save.py +1 -1
  123. examples/warming_up_to_rl/run_local_rollout_traced.py +1 -1
  124. examples/warming_up_to_rl/run_rl_and_save.py +1 -1
  125. examples/warming_up_to_rl/task_app/README.md +42 -0
  126. examples/warming_up_to_rl/task_app/grpo_crafter.py +827 -0
  127. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +135 -0
  128. examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
  129. examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
  130. examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +143 -0
  131. examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1226 -0
  132. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
  133. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
  134. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
  135. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +522 -0
  136. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +454 -0
  137. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +108 -0
  138. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
  139. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
  140. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +204 -0
  141. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
  142. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +618 -0
  143. examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +100 -0
  144. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +1084 -0
  145. examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +195 -0
  146. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1861 -0
  147. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
  148. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +211 -0
  149. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +161 -0
  150. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +137 -0
  151. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +62 -0
  152. examples/workflows/math_rl/configs/rl_from_base_qwen.toml +27 -0
  153. examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +5 -0
  154. synth_ai/__init__.py +44 -30
  155. synth_ai/_utils/__init__.py +47 -0
  156. synth_ai/_utils/base_url.py +10 -0
  157. synth_ai/_utils/http.py +10 -0
  158. synth_ai/_utils/prompts.py +10 -0
  159. synth_ai/_utils/task_app_state.py +12 -0
  160. synth_ai/_utils/user_config.py +10 -0
  161. synth_ai/api/models/supported.py +144 -7
  162. synth_ai/api/train/__init__.py +13 -1
  163. synth_ai/api/train/builders.py +9 -3
  164. synth_ai/api/train/cli.py +155 -17
  165. synth_ai/api/train/config_finder.py +18 -11
  166. synth_ai/api/train/configs/__init__.py +8 -1
  167. synth_ai/api/train/configs/rl.py +32 -7
  168. synth_ai/api/train/configs/sft.py +6 -2
  169. synth_ai/api/train/configs/shared.py +59 -2
  170. synth_ai/api/train/env_resolver.py +13 -10
  171. synth_ai/auth/credentials.py +119 -0
  172. synth_ai/cli/__init__.py +61 -69
  173. synth_ai/cli/_modal_wrapper.py +7 -5
  174. synth_ai/cli/_typer_patch.py +0 -2
  175. synth_ai/cli/_validate_task_app.py +22 -4
  176. synth_ai/cli/commands/__init__.py +17 -0
  177. synth_ai/cli/commands/demo/__init__.py +6 -0
  178. synth_ai/cli/commands/demo/core.py +163 -0
  179. synth_ai/cli/commands/deploy/__init__.py +23 -0
  180. synth_ai/cli/commands/deploy/core.py +614 -0
  181. synth_ai/cli/commands/deploy/errors.py +72 -0
  182. synth_ai/cli/commands/deploy/validation.py +11 -0
  183. synth_ai/cli/commands/eval/__init__.py +19 -0
  184. synth_ai/cli/commands/eval/core.py +1109 -0
  185. synth_ai/cli/commands/eval/errors.py +81 -0
  186. synth_ai/cli/commands/eval/validation.py +133 -0
  187. synth_ai/cli/commands/filter/__init__.py +12 -0
  188. synth_ai/cli/commands/filter/core.py +388 -0
  189. synth_ai/cli/commands/filter/errors.py +55 -0
  190. synth_ai/cli/commands/filter/validation.py +77 -0
  191. synth_ai/cli/commands/help/__init__.py +177 -0
  192. synth_ai/cli/commands/help/core.py +73 -0
  193. synth_ai/cli/commands/status/__init__.py +64 -0
  194. synth_ai/cli/commands/status/client.py +192 -0
  195. synth_ai/cli/commands/status/config.py +92 -0
  196. synth_ai/cli/commands/status/errors.py +20 -0
  197. synth_ai/cli/commands/status/formatters.py +164 -0
  198. synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
  199. synth_ai/cli/commands/status/subcommands/files.py +79 -0
  200. synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
  201. synth_ai/cli/commands/status/subcommands/models.py +79 -0
  202. synth_ai/cli/commands/status/subcommands/runs.py +81 -0
  203. synth_ai/cli/commands/status/subcommands/summary.py +47 -0
  204. synth_ai/cli/commands/status/utils.py +114 -0
  205. synth_ai/cli/commands/train/__init__.py +53 -0
  206. synth_ai/cli/commands/train/core.py +21 -0
  207. synth_ai/cli/commands/train/errors.py +117 -0
  208. synth_ai/cli/commands/train/judge_schemas.py +199 -0
  209. synth_ai/cli/commands/train/judge_validation.py +304 -0
  210. synth_ai/cli/commands/train/validation.py +443 -0
  211. synth_ai/cli/demo.py +2 -162
  212. synth_ai/cli/deploy/__init__.py +28 -0
  213. synth_ai/cli/deploy/core.py +5 -0
  214. synth_ai/cli/deploy/errors.py +23 -0
  215. synth_ai/cli/deploy/validation.py +5 -0
  216. synth_ai/cli/eval/__init__.py +36 -0
  217. synth_ai/cli/eval/core.py +5 -0
  218. synth_ai/cli/eval/errors.py +31 -0
  219. synth_ai/cli/eval/validation.py +5 -0
  220. synth_ai/cli/filter/__init__.py +28 -0
  221. synth_ai/cli/filter/core.py +5 -0
  222. synth_ai/cli/filter/errors.py +23 -0
  223. synth_ai/cli/filter/validation.py +5 -0
  224. synth_ai/cli/legacy_root_backup.py +3 -1
  225. synth_ai/cli/lib/__init__.py +10 -0
  226. synth_ai/cli/lib/task_app_discovery.py +7 -0
  227. synth_ai/cli/lib/task_app_env.py +518 -0
  228. synth_ai/cli/modal_serve/__init__.py +12 -0
  229. synth_ai/cli/modal_serve/core.py +14 -0
  230. synth_ai/cli/modal_serve/errors.py +8 -0
  231. synth_ai/cli/modal_serve/validation.py +11 -0
  232. synth_ai/cli/recent.py +2 -1
  233. synth_ai/cli/serve/__init__.py +12 -0
  234. synth_ai/cli/serve/core.py +14 -0
  235. synth_ai/cli/serve/errors.py +8 -0
  236. synth_ai/cli/serve/validation.py +11 -0
  237. synth_ai/cli/setup.py +21 -0
  238. synth_ai/cli/status.py +7 -126
  239. synth_ai/cli/task_app_deploy.py +7 -0
  240. synth_ai/cli/task_app_list.py +25 -0
  241. synth_ai/cli/task_app_modal_serve.py +11 -0
  242. synth_ai/cli/task_app_serve.py +11 -0
  243. synth_ai/cli/task_apps.py +110 -1499
  244. synth_ai/cli/traces.py +1 -1
  245. synth_ai/cli/train/__init__.py +12 -0
  246. synth_ai/cli/train/core.py +21 -0
  247. synth_ai/cli/train/errors.py +8 -0
  248. synth_ai/cli/train/validation.py +24 -0
  249. synth_ai/cli/train.py +5 -0
  250. synth_ai/cli/turso.py +1 -1
  251. synth_ai/cli/watch.py +1 -1
  252. synth_ai/demos/__init__.py +10 -0
  253. synth_ai/demos/core/__init__.py +28 -1
  254. synth_ai/demos/crafter/__init__.py +1 -0
  255. synth_ai/demos/crafter/crafter_fft_4b.toml +55 -0
  256. synth_ai/demos/crafter/grpo_crafter_task_app.py +185 -0
  257. synth_ai/demos/crafter/rl_from_base_qwen4b.toml +74 -0
  258. synth_ai/demos/demo_registry.py +176 -0
  259. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
  260. synth_ai/demos/math/__init__.py +1 -0
  261. synth_ai/demos/math/_common.py +16 -0
  262. synth_ai/demos/math/app.py +38 -0
  263. synth_ai/demos/math/config.toml +76 -0
  264. synth_ai/demos/math/deploy_modal.py +54 -0
  265. synth_ai/demos/math/modal_task_app.py +702 -0
  266. synth_ai/demos/math/task_app_entry.py +51 -0
  267. synth_ai/environments/environment/core.py +7 -1
  268. synth_ai/environments/examples/bandit/engine.py +0 -1
  269. synth_ai/environments/examples/bandit/environment.py +0 -1
  270. synth_ai/environments/examples/red/engine.py +33 -12
  271. synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
  272. synth_ai/environments/examples/red/environment.py +26 -0
  273. synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
  274. synth_ai/environments/examples/wordle/environment.py +0 -1
  275. synth_ai/evals/base.py +16 -5
  276. synth_ai/evals/client.py +1 -1
  277. synth_ai/http.py +8 -22
  278. synth_ai/inference/client.py +1 -1
  279. synth_ai/judge_schemas.py +4 -5
  280. synth_ai/learning/client.py +1 -1
  281. synth_ai/learning/health.py +1 -1
  282. synth_ai/learning/jobs.py +1 -1
  283. synth_ai/learning/rl/client.py +4 -2
  284. synth_ai/learning/rl/env_keys.py +1 -1
  285. synth_ai/learning/rl/secrets.py +1 -1
  286. synth_ai/learning/sft/client.py +1 -1
  287. synth_ai/learning/sft/data.py +407 -4
  288. synth_ai/learning/validators.py +4 -1
  289. synth_ai/streaming/__init__.py +29 -0
  290. synth_ai/streaming/config.py +94 -0
  291. synth_ai/streaming/handlers.py +469 -0
  292. synth_ai/streaming/streamer.py +301 -0
  293. synth_ai/streaming/types.py +95 -0
  294. synth_ai/task/apps/__init__.py +4 -2
  295. synth_ai/task/config.py +6 -4
  296. synth_ai/task/rubrics/__init__.py +1 -2
  297. synth_ai/task/rubrics/loaders.py +14 -10
  298. synth_ai/task/rubrics.py +219 -0
  299. synth_ai/task/trace_correlation_helpers.py +24 -11
  300. synth_ai/task/tracing_utils.py +14 -3
  301. synth_ai/task/validators.py +0 -1
  302. synth_ai/tracing_v3/abstractions.py +3 -3
  303. synth_ai/tracing_v3/config.py +15 -13
  304. synth_ai/tracing_v3/constants.py +21 -0
  305. synth_ai/tracing_v3/db_config.py +3 -1
  306. synth_ai/tracing_v3/decorators.py +10 -7
  307. synth_ai/tracing_v3/llm_call_record_helpers.py +5 -5
  308. synth_ai/tracing_v3/migration_helper.py +1 -2
  309. synth_ai/tracing_v3/session_tracer.py +7 -7
  310. synth_ai/tracing_v3/storage/base.py +29 -29
  311. synth_ai/tracing_v3/storage/config.py +3 -3
  312. synth_ai/tracing_v3/turso/daemon.py +8 -9
  313. synth_ai/tracing_v3/turso/native_manager.py +80 -72
  314. synth_ai/tracing_v3/utils.py +2 -2
  315. synth_ai/utils/__init__.py +101 -0
  316. synth_ai/utils/base_url.py +94 -0
  317. synth_ai/utils/cli.py +131 -0
  318. synth_ai/utils/env.py +294 -0
  319. synth_ai/utils/http.py +172 -0
  320. synth_ai/utils/modal.py +308 -0
  321. synth_ai/utils/process.py +212 -0
  322. synth_ai/utils/prompts.py +39 -0
  323. synth_ai/utils/sqld.py +122 -0
  324. synth_ai/utils/task_app_discovery.py +882 -0
  325. synth_ai/utils/task_app_env.py +186 -0
  326. synth_ai/utils/task_app_state.py +318 -0
  327. synth_ai/utils/user_config.py +137 -0
  328. synth_ai/v0/config/__init__.py +1 -5
  329. synth_ai/v0/config/base_url.py +1 -7
  330. synth_ai/v0/tracing/config.py +1 -1
  331. synth_ai/v0/tracing/decorators.py +1 -1
  332. synth_ai/v0/tracing/upload.py +1 -1
  333. synth_ai/v0/tracing_v1/config.py +1 -1
  334. synth_ai/v0/tracing_v1/decorators.py +1 -1
  335. synth_ai/v0/tracing_v1/upload.py +1 -1
  336. {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/METADATA +91 -32
  337. {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/RECORD +341 -154
  338. synth_ai/cli/man.py +0 -106
  339. synth_ai/cli/tui.py +0 -57
  340. synth_ai/compound/cais.py +0 -0
  341. synth_ai/core/experiment.py +0 -13
  342. synth_ai/core/system.py +0 -15
  343. synth_ai/demo_registry.py +0 -295
  344. synth_ai/handshake.py +0 -109
  345. synth_ai/tui/__init__.py +0 -5
  346. synth_ai/tui/__main__.py +0 -13
  347. synth_ai/tui/cli/__init__.py +0 -1
  348. synth_ai/tui/cli/query_experiments.py +0 -164
  349. synth_ai/tui/cli/query_experiments_v3.py +0 -164
  350. synth_ai/tui/dashboard.py +0 -906
  351. {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/WHEEL +0 -0
  352. {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/entry_points.txt +0 -0
  353. {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/licenses/LICENSE +0 -0
  354. {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,201 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Quick test script to demonstrate image validation.
4
+
5
+ Run from synth-ai root:
6
+ uv run python examples/qwen_vl/test_image_validation.py
7
+ """
8
+
9
+ from synth_ai.learning.sft.data import coerce_example, validate_vision_example
10
+
11
+ # Test cases
12
+ test_cases = [
13
+ {
14
+ "name": "Valid - HTTP URL",
15
+ "data": {
16
+ "messages": [
17
+ {
18
+ "role": "user",
19
+ "content": [
20
+ {"type": "text", "text": "Describe this"},
21
+ {"type": "image_url", "image_url": {"url": "https://example.com/image.jpg"}},
22
+ ],
23
+ },
24
+ {"role": "assistant", "content": "A beautiful image"},
25
+ ]
26
+ },
27
+ "should_pass": True,
28
+ },
29
+ {
30
+ "name": "Valid - Base64",
31
+ "data": {
32
+ "messages": [
33
+ {
34
+ "role": "user",
35
+ "content": [
36
+ {"type": "image_url", "image_url": {"url": "data:image/png;base64,iVBORw0KGgo..."}},
37
+ ],
38
+ },
39
+ {"role": "assistant", "content": "An image"},
40
+ ]
41
+ },
42
+ "should_pass": True,
43
+ },
44
+ {
45
+ "name": "Invalid - Empty URL",
46
+ "data": {
47
+ "messages": [
48
+ {
49
+ "role": "user",
50
+ "content": [
51
+ {"type": "text", "text": "What's this?"},
52
+ {"type": "image_url", "image_url": {"url": ""}}, # Empty!
53
+ ],
54
+ },
55
+ {"role": "assistant", "content": "Response"},
56
+ ]
57
+ },
58
+ "should_pass": False,
59
+ },
60
+ {
61
+ "name": "Invalid - Missing URL field",
62
+ "data": {
63
+ "messages": [
64
+ {
65
+ "role": "user",
66
+ "content": [
67
+ {"type": "image_url", "image_url": {}}, # No url field!
68
+ ],
69
+ },
70
+ {"role": "assistant", "content": "Response"},
71
+ ]
72
+ },
73
+ "should_pass": False,
74
+ },
75
+ {
76
+ "name": "Invalid - Null URL",
77
+ "data": {
78
+ "messages": [
79
+ {
80
+ "role": "user",
81
+ "content": [
82
+ {"type": "image_url", "image_url": {"url": None}}, # Null!
83
+ ],
84
+ },
85
+ {"role": "assistant", "content": "Response"},
86
+ ]
87
+ },
88
+ "should_pass": False,
89
+ },
90
+ {
91
+ "name": "Invalid - Whitespace URL",
92
+ "data": {
93
+ "messages": [
94
+ {
95
+ "role": "user",
96
+ "content": [
97
+ {"type": "image_url", "image_url": {"url": " "}}, # Whitespace!
98
+ ],
99
+ },
100
+ {"role": "assistant", "content": "Response"},
101
+ ]
102
+ },
103
+ "should_pass": False,
104
+ },
105
+ {
106
+ "name": "Invalid - Mixed valid and invalid",
107
+ "data": {
108
+ "messages": [
109
+ {
110
+ "role": "user",
111
+ "content": [
112
+ {"type": "image_url", "image_url": {"url": "https://example.com/valid.jpg"}},
113
+ {"type": "image_url", "image_url": {"url": ""}}, # One invalid!
114
+ ],
115
+ },
116
+ {"role": "assistant", "content": "Response"},
117
+ ]
118
+ },
119
+ "should_pass": False,
120
+ },
121
+ {
122
+ "name": "Invalid - Non-string URL",
123
+ "data": {
124
+ "messages": [
125
+ {
126
+ "role": "user",
127
+ "content": [
128
+ {"type": "image_url", "image_url": {"url": 12345}}, # Integer!
129
+ ],
130
+ },
131
+ {"role": "assistant", "content": "Response"},
132
+ ]
133
+ },
134
+ "should_pass": False,
135
+ },
136
+ ]
137
+
138
+
139
+ def main():
140
+ print("=" * 80)
141
+ print("IMAGE VALIDATION TEST")
142
+ print("=" * 80)
143
+ print()
144
+
145
+ passed = 0
146
+ failed = 0
147
+
148
+ for test in test_cases:
149
+ name = test["name"]
150
+ data = test["data"]
151
+ should_pass = test["should_pass"]
152
+
153
+ try:
154
+ example = coerce_example(data)
155
+ is_valid, error = validate_vision_example(example, require_images=True)
156
+
157
+ if should_pass:
158
+ if is_valid:
159
+ print(f"✅ PASS: {name}")
160
+ print(f" → Correctly accepted valid example")
161
+ passed += 1
162
+ else:
163
+ print(f"❌ FAIL: {name}")
164
+ print(f" → Should pass but got error: {error}")
165
+ failed += 1
166
+ else:
167
+ if not is_valid:
168
+ print(f"✅ PASS: {name}")
169
+ print(f" → Correctly rejected: {error}")
170
+ passed += 1
171
+ else:
172
+ print(f"❌ FAIL: {name}")
173
+ print(f" → Should fail but passed validation")
174
+ failed += 1
175
+ except Exception as exc:
176
+ if should_pass:
177
+ print(f"❌ FAIL: {name}")
178
+ print(f" → Unexpected exception: {exc}")
179
+ failed += 1
180
+ else:
181
+ print(f"✅ PASS: {name}")
182
+ print(f" → Correctly raised exception: {exc}")
183
+ passed += 1
184
+
185
+ print()
186
+
187
+ print("=" * 80)
188
+ print(f"RESULTS: {passed}/{len(test_cases)} passed, {failed}/{len(test_cases)} failed")
189
+ print("=" * 80)
190
+
191
+ if failed == 0:
192
+ print("🎉 All tests passed!")
193
+ return 0
194
+ else:
195
+ print(f"⚠️ {failed} test(s) failed")
196
+ return 1
197
+
198
+
199
+ if __name__ == "__main__":
200
+ exit(main())
201
+
@@ -0,0 +1,110 @@
1
+ """Generate test vision SFT dataset for Qwen3-VL-2B."""
2
+
3
+ import base64
4
+ import json
5
+ from pathlib import Path
6
+ from io import BytesIO
7
+
8
+ try:
9
+ from PIL import Image
10
+ except ImportError:
11
+ print("❌ PIL not available")
12
+ exit(1)
13
+
14
+ BASE_DIR = Path(__file__).resolve().parent
15
+
16
+ def create_test_image(color: str) -> str:
17
+ """Create a 64x64 colored square and return base64 data URL."""
18
+ colors = {
19
+ "red": (255, 0, 0),
20
+ "blue": (0, 0, 255),
21
+ "green": (0, 255, 0),
22
+ "yellow": (255, 255, 0),
23
+ "purple": (128, 0, 128),
24
+ }
25
+
26
+ img = Image.new('RGB', (64, 64), color=colors[color])
27
+ buffer = BytesIO()
28
+ img.save(buffer, format='PNG')
29
+ b64 = base64.b64encode(buffer.getvalue()).decode('utf-8')
30
+ return f"data:image/png;base64,{b64}"
31
+
32
+
33
+ def main():
34
+ output_dir = BASE_DIR / "test_data"
35
+ output_dir.mkdir(parents=True, exist_ok=True)
36
+
37
+ output_file = output_dir / "vision_sft_test.jsonl"
38
+
39
+ # Create 10 training examples with different colored images
40
+ examples = []
41
+ colors = ["red", "blue", "green", "yellow", "purple"]
42
+
43
+ for i, color in enumerate(colors):
44
+ # Simple color identification
45
+ examples.append({
46
+ "messages": [
47
+ {
48
+ "role": "user",
49
+ "content": [
50
+ {"type": "text", "text": "What color is this image? Answer in one word."},
51
+ {"type": "image_url", "image_url": {"url": create_test_image(color)}},
52
+ ],
53
+ },
54
+ {
55
+ "role": "assistant",
56
+ "content": color.capitalize(),
57
+ },
58
+ ],
59
+ "metadata": {"example_id": f"color_{i}", "type": "color_id"},
60
+ })
61
+
62
+ # Describe the image
63
+ examples.append({
64
+ "messages": [
65
+ {
66
+ "role": "user",
67
+ "content": [
68
+ {"type": "text", "text": "Describe this image briefly."},
69
+ {"type": "image_url", "image_url": {"url": create_test_image(color)}},
70
+ ],
71
+ },
72
+ {
73
+ "role": "assistant",
74
+ "content": f"This is a {color} colored square image.",
75
+ },
76
+ ],
77
+ "metadata": {"example_id": f"describe_{i}", "type": "description"},
78
+ })
79
+
80
+ # Write JSONL
81
+ with output_file.open("w", encoding="utf-8") as f:
82
+ for example in examples:
83
+ f.write(json.dumps(example) + "\n")
84
+
85
+ print(f"✅ Created {len(examples)} vision SFT examples")
86
+ print(f" Output: {output_file}")
87
+ print(f" Size: {output_file.stat().st_size / 1024:.1f} KB")
88
+
89
+ # Validate with SDK
90
+ try:
91
+ from synth_ai.learning.sft.data import load_jsonl, validate_vision_example
92
+
93
+ loaded = load_jsonl(output_file, min_messages=1)
94
+ print(f" Loaded: {len(loaded)} examples")
95
+
96
+ valid_count = 0
97
+ for ex in loaded:
98
+ is_valid, error = validate_vision_example(ex, require_images=True)
99
+ if is_valid:
100
+ valid_count += 1
101
+ else:
102
+ print(f" ⚠️ Invalid example: {error}")
103
+
104
+ print(f" Valid: {valid_count}/{len(loaded)}")
105
+ except ImportError:
106
+ print(" (SDK validation skipped - synth_ai not available)")
107
+
108
+
109
+ if __name__ == "__main__":
110
+ main()
examples/rl/README.md CHANGED
@@ -5,8 +5,8 @@ This example trains a reinforcement learning policy on single-step math problems
5
5
  ## Quick Commands
6
6
 
7
7
  ```bash
8
- # Serve locally with tracing
9
- uvx synth-ai serve math-single-step --port 8101 --env-file examples/rl/.env --trace traces/math
8
+ # Serve locally with tracing (uvicorn runtime)
9
+ uvx synth-ai deploy --runtime uvicorn math-single-step --port 8101 --env-file examples/rl/.env --trace traces/math
10
10
 
11
11
  # Modal deployment
12
12
  uvx synth-ai deploy --name synth-math-single-step --env-file examples/rl/.env
@@ -45,14 +45,14 @@ The task app is defined in `synth_ai/task/apps/math_single_step.py` and register
45
45
  - `-0.5` if the tool call omits an answer or uses the wrong tool
46
46
  - `-1.0` when no tool call is provided
47
47
 
48
- Serve locally with tracing to capture trajectories:
48
+ Run locally (uvicorn runtime) with tracing to capture trajectories:
49
49
 
50
50
  ```bash
51
- uvx synth-ai serve math-single-step \
51
+ uvx synth-ai deploy --runtime uvicorn math-single-step \
52
52
  --port 8101 \
53
53
  --env-file examples/rl/.env \
54
54
  --trace traces/math \
55
- --trace-db traces/math/synth_ai.db
55
+ --trace-db traces/math/task_app_traces_<timestamp>.db
56
56
  ```
57
57
 
58
58
  Deploy or serve on Modal using the same env file; the registration includes a `ModalDeploymentConfig` that installs the `datasets` package automatically.
@@ -162,7 +162,7 @@ For broader background on Synth task apps, CLI commands, and tracing, see the ne
162
162
 
163
163
 
164
164
  uv run python examples/rl/run_eval.py --toml examples/rl/configs/eval_base_qwen.toml
165
- uvx synth-ai serve math-single-step \
165
+ uvx synth-ai deploy --runtime uvicorn math-single-step \
166
166
  --port 8101 \
167
167
  --env-file examples/rl/.env \
168
168
  --trace traces/math \
@@ -0,0 +1,17 @@
1
+ type = "rl"
2
+
3
+ provider = "synth"
4
+ task_app_url = "http://localhost:8101"
5
+ model = "Qwen/Qwen3-1.7B"
6
+ split = "validation"
7
+ num_episodes = 50
8
+ seed_start = 0
9
+
10
+ [policy]
11
+ inference_url = "https://agent-learning.onrender.com/api/inference"
12
+ max_tokens = 128
13
+ temperature = 0.0
14
+
15
+ # Optionally supply custom headers
16
+ # [policy.headers]
17
+ # Authorization = "Bearer ..."
@@ -0,0 +1,13 @@
1
+ type = "rl"
2
+
3
+ provider = "synth"
4
+ task_app_url = "https://your-math-task.modal.run"
5
+ model = "rl:REPLACE_WITH_JOB_ID"
6
+ split = "test"
7
+ num_episodes = 200
8
+ seed_start = 100000
9
+
10
+ [policy]
11
+ inference_url = "https://your-inference-host"
12
+ max_tokens = 128
13
+ temperature = 0.0
@@ -0,0 +1,62 @@
1
+ [algorithm]
2
+ type = "online"
3
+ method = "policy_gradient"
4
+ variety = "gspo"
5
+
6
+ [services]
7
+ task_url = "https://your-math-task.modal.run"
8
+
9
+ [model]
10
+ base = "Qwen/Qwen3-4B"
11
+ trainer_mode = "full"
12
+ label = "math-single-step-qwen3-4b"
13
+
14
+ [policy]
15
+ model = "Qwen/Qwen3-4B"
16
+ inference_url = "https://your-inference-host"
17
+ max_tokens = 128
18
+ temperature = 0.0
19
+
20
+ [data]
21
+ split = "train"
22
+ seed_start = 0
23
+ episodes_per_iteration = 2048
24
+ evaluation_split = "validation"
25
+ evaluation_episodes = 256
26
+
27
+ [training]
28
+ num_epochs = 1
29
+ iterations_per_epoch = 20
30
+ max_turns = 1
31
+ ops = ["agent", "env"]
32
+ batch_size = 128
33
+ group_size = 1024
34
+ reward_positive = 1.0
35
+ reward_negative_no_tool = -1.0
36
+ reward_negative_no_answer = -0.5
37
+ learning_rate = 5e-6
38
+
39
+ [compute]
40
+ gpu_type = "A10G"
41
+ gpu_count = 4
42
+
43
+ [topology]
44
+ type = "single_node_split"
45
+ gpus_for_vllm = 2
46
+ gpus_for_training = 2
47
+ gpus_for_ref = 0
48
+ tensor_parallel = 1
49
+
50
+ [rollout]
51
+ env_name = "math"
52
+ policy_name = "math-single-step"
53
+ max_turns = 1
54
+ episodes_per_batch = 256
55
+
56
+ [evaluation]
57
+ instances = 256
58
+ every_n_iters = 10
59
+ seeds = [0, 1, 2, 3, 4]
60
+
61
+ [tags]
62
+ experiment = "math_single_step"
@@ -0,0 +1,79 @@
1
+ [algorithm]
2
+ type = "online"
3
+ method = "policy_gradient"
4
+ variety = "gspo"
5
+
6
+ [services]
7
+ task_url = "http://localhost:8101"
8
+
9
+ [model]
10
+ base = "Qwen/Qwen3-1.7B"
11
+ trainer_mode = "full"
12
+ label = "math-single-step-qwen3-1.7b"
13
+
14
+ [policy]
15
+ model = "Qwen/Qwen3-1.7B"
16
+ inference_url = "https://agent-learning.onrender.com/api/inference"
17
+ max_tokens = 1028
18
+ temperature = 0.2
19
+
20
+ [data]
21
+ split = "train"
22
+ seed_start = 0
23
+ episodes_per_iteration = 1280 # 8 per group * 4 groups per batch * 2 batches per step * 20 steps
24
+ evaluation_split = "validation"
25
+ evaluation_episodes = 50
26
+
27
+ [training]
28
+ num_epochs = 1
29
+ iterations_per_epoch = 20
30
+ max_turns = 1
31
+ ops = ["agent", "env"]
32
+ batch_size = 2
33
+ group_size = 16
34
+ reward_positive = 1.0
35
+ reward_negative_no_tool = -1.0
36
+ reward_negative_no_answer = -0.5
37
+ learning_rate = 5e-6
38
+ log_interval = 1
39
+ weight_sync_interval = 1
40
+
41
+ [training.weight_sync]
42
+ enable = true
43
+ targets = ["policy"]
44
+
45
+ [compute]
46
+ gpu_type = "H100"
47
+ gpu_count = 4
48
+
49
+ [topology]
50
+ type = "single_node_split"
51
+ gpus_for_vllm = 2
52
+ gpus_for_training = 1
53
+ gpus_for_ref = 1
54
+ tensor_parallel = 1
55
+
56
+ [vllm]
57
+ tensor_parallel_size = 1
58
+ max_model_len = 4096
59
+
60
+ [reference]
61
+ placement = "dedicated"
62
+ port = 8002
63
+ tp = 1
64
+ health_max_wait_s = 180
65
+ health_interval_ms = 300
66
+
67
+ [rollout]
68
+ env_name = "math"
69
+ policy_name = "math-single-step"
70
+ max_turns = 1
71
+ episodes_per_batch = 32 # group_size * batch_size
72
+
73
+ [evaluation]
74
+ instances = 32
75
+ every_n_iters = 10
76
+ seeds = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
77
+
78
+ [tags]
79
+ experiment = "math_single_step_qwen17"
@@ -0,0 +1,37 @@
1
+ type = "rl"
2
+
3
+ [services]
4
+ task_url = "https://your-math-task.modal.run"
5
+
6
+ [model]
7
+ source = "ft:REPLACE_WITH_MODEL_ID"
8
+
9
+ [policy]
10
+ model = "ft:REPLACE_WITH_MODEL_ID"
11
+ inference_url = "https://your-inference-host"
12
+ max_tokens = 128
13
+ temperature = 0.0
14
+
15
+ [data]
16
+ split = "train"
17
+ seed_start = 0
18
+ episodes_per_iteration = 2048
19
+ evaluation_split = "validation"
20
+ evaluation_episodes = 256
21
+
22
+ [training]
23
+ max_turns = 1
24
+ ops = ["agent", "env"]
25
+ batch_size = 128
26
+ group_size = 1024
27
+ reward_positive = 1.0
28
+ reward_negative_no_tool = -1.0
29
+ reward_negative_no_answer = -0.5
30
+ learning_rate = 5e-6
31
+
32
+ [compute]
33
+ gpu_type = "A10G"
34
+ gpu_count = 4
35
+
36
+ [tags]
37
+ experiment = "math_single_step_from_fft"