synth-ai 0.2.14__py3-none-any.whl → 0.2.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (354) hide show
  1. examples/README.md +1 -0
  2. examples/analyze_semantic_words.sh +2 -2
  3. examples/blog_posts/pokemon_vl/README.md +98 -0
  4. examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +25 -0
  5. examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
  6. examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
  7. examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +42 -0
  8. examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
  9. examples/blog_posts/warming_up_to_rl/README.md +158 -0
  10. examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
  11. examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
  12. examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
  13. examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
  14. examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +41 -0
  15. examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
  16. examples/dev/qwen3_32b_qlora_4xh100.toml +5 -0
  17. examples/multi_step/SFT_README.md +147 -0
  18. examples/multi_step/configs/crafter_rl_outcome.toml +1 -1
  19. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +73 -115
  20. examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +1 -1
  21. examples/multi_step/configs/crafter_rl_stepwise_simple.toml +1 -1
  22. examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
  23. examples/multi_step/configs/crafter_sft_qwen30b_lora.toml +62 -0
  24. examples/multi_step/configs/verilog_rl_lora.toml +80 -123
  25. examples/multi_step/convert_traces_to_sft.py +84 -0
  26. examples/multi_step/run_sft_qwen30b.sh +45 -0
  27. examples/qwen_coder/configs/coder_lora_30b.toml +1 -2
  28. examples/qwen_coder/configs/coder_lora_4b.toml +5 -1
  29. examples/qwen_coder/configs/coder_lora_small.toml +1 -2
  30. examples/qwen_vl/BUGS_AND_FIXES.md +232 -0
  31. examples/qwen_vl/IMAGE_VALIDATION_COMPLETE.md +271 -0
  32. examples/qwen_vl/IMAGE_VALIDATION_SUMMARY.md +260 -0
  33. examples/qwen_vl/INFERENCE_SFT_TESTS.md +412 -0
  34. examples/qwen_vl/NEXT_STEPS_2B.md +325 -0
  35. examples/qwen_vl/QUICKSTART.md +327 -0
  36. examples/qwen_vl/QUICKSTART_RL_VISION.md +110 -0
  37. examples/qwen_vl/README.md +152 -0
  38. examples/qwen_vl/RL_VISION_COMPLETE.md +475 -0
  39. examples/qwen_vl/RL_VISION_TESTING.md +333 -0
  40. examples/qwen_vl/SDK_VISION_INTEGRATION.md +328 -0
  41. examples/qwen_vl/SETUP_COMPLETE.md +274 -0
  42. examples/qwen_vl/VISION_TESTS_COMPLETE.md +489 -0
  43. examples/qwen_vl/VLM_PIPELINE_COMPLETE.md +242 -0
  44. examples/qwen_vl/__init__.py +2 -0
  45. examples/qwen_vl/collect_data_via_cli.md +415 -0
  46. examples/qwen_vl/collect_vision_traces.py +368 -0
  47. examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +110 -0
  48. examples/qwen_vl/configs/crafter_vlm_sft_example.toml +59 -0
  49. examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +26 -0
  50. examples/qwen_vl/configs/eval_gpt4o_vision_proper.toml +29 -0
  51. examples/qwen_vl/configs/eval_gpt5nano_vision.toml +26 -0
  52. examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
  53. examples/qwen_vl/configs/filter_qwen3vl_sft.toml +49 -0
  54. examples/qwen_vl/configs/filter_vision_sft.toml +52 -0
  55. examples/qwen_vl/configs/filter_vision_test.toml +8 -0
  56. examples/qwen_vl/configs/sft_qwen3_vl_2b_test.toml +54 -0
  57. examples/qwen_vl/crafter_gpt5nano_agent.py +308 -0
  58. examples/qwen_vl/crafter_qwen_vl_agent.py +300 -0
  59. examples/qwen_vl/run_vision_comparison.sh +61 -0
  60. examples/qwen_vl/run_vision_sft_pipeline.sh +175 -0
  61. examples/qwen_vl/test_image_validation.py +201 -0
  62. examples/qwen_vl/test_sft_vision_data.py +110 -0
  63. examples/rl/README.md +6 -6
  64. examples/rl/configs/eval_base_qwen.toml +17 -0
  65. examples/rl/configs/eval_rl_qwen.toml +13 -0
  66. examples/rl/configs/rl_from_base_qwen.toml +62 -0
  67. examples/rl/configs/rl_from_base_qwen17.toml +79 -0
  68. examples/rl/configs/rl_from_ft_qwen.toml +37 -0
  69. examples/rl/run_eval.py +436 -0
  70. examples/rl/run_rl_and_save.py +111 -0
  71. examples/rl/task_app/README.md +21 -0
  72. examples/rl/task_app/math_single_step.py +990 -0
  73. examples/rl/task_app/math_task_app.py +111 -0
  74. examples/run_crafter_demo.sh +2 -2
  75. examples/sft/README.md +6 -6
  76. examples/sft/configs/crafter_fft_qwen0p6b.toml +7 -2
  77. examples/sft/configs/crafter_lora_qwen0p6b.toml +7 -3
  78. examples/sft/evaluate.py +2 -4
  79. examples/sft/export_dataset.py +7 -4
  80. examples/swe/task_app/README.md +33 -3
  81. examples/swe/task_app/grpo_swe_mini.py +4 -1
  82. examples/swe/task_app/grpo_swe_mini_task_app.py +0 -12
  83. examples/swe/task_app/hosted/envs/crafter/react_agent.py +1 -1
  84. examples/swe/task_app/hosted/envs/mini_swe/environment.py +50 -23
  85. examples/swe/task_app/hosted/inference/openai_client.py +4 -4
  86. examples/swe/task_app/hosted/policy_routes.py +0 -2
  87. examples/swe/task_app/hosted/rollout.py +0 -8
  88. examples/swe/task_app/morph_backend.py +178 -0
  89. examples/task_apps/crafter/task_app/README.md +1 -1
  90. examples/task_apps/crafter/task_app/grpo_crafter.py +70 -10
  91. examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +1 -1
  92. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +63 -27
  93. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -2
  94. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +48 -50
  95. examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +75 -36
  96. examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +31 -15
  97. examples/task_apps/enron/__init__.py +1 -0
  98. examples/task_apps/enron/task_app/grpo_enron_task_app.py +1 -1
  99. examples/task_apps/math/README.md +1 -2
  100. examples/task_apps/pokemon_red/README.md +3 -4
  101. examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +6 -5
  102. examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +1 -2
  103. examples/task_apps/pokemon_red/task_app.py +36 -5
  104. examples/task_apps/sokoban/README.md +2 -3
  105. examples/task_apps/verilog/eval_groq_qwen32b.toml +12 -14
  106. examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +1 -1
  107. examples/vlm/README.md +3 -3
  108. examples/vlm/configs/crafter_vlm_gpt4o.toml +5 -0
  109. examples/vlm/crafter_openai_vlm_agent.py +3 -5
  110. examples/vlm/filter_image_rows.py +1 -1
  111. examples/vlm/run_crafter_vlm_benchmark.py +2 -2
  112. examples/warming_up_to_rl/_utils.py +92 -0
  113. examples/warming_up_to_rl/analyze_trace_db.py +1 -1
  114. examples/warming_up_to_rl/configs/crafter_fft.toml +5 -0
  115. examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +2 -0
  116. examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +2 -0
  117. examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +2 -1
  118. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +2 -1
  119. examples/warming_up_to_rl/configs/rl_from_ft.toml +2 -0
  120. examples/warming_up_to_rl/export_trace_sft.py +174 -60
  121. examples/warming_up_to_rl/readme.md +63 -132
  122. examples/warming_up_to_rl/run_fft_and_save.py +1 -1
  123. examples/warming_up_to_rl/run_local_rollout_traced.py +1 -1
  124. examples/warming_up_to_rl/run_rl_and_save.py +1 -1
  125. examples/warming_up_to_rl/task_app/README.md +42 -0
  126. examples/warming_up_to_rl/task_app/grpo_crafter.py +827 -0
  127. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +135 -0
  128. examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
  129. examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
  130. examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +143 -0
  131. examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1226 -0
  132. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
  133. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
  134. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
  135. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +522 -0
  136. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +454 -0
  137. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +108 -0
  138. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
  139. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
  140. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +204 -0
  141. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
  142. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +618 -0
  143. examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +100 -0
  144. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +1084 -0
  145. examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +195 -0
  146. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1861 -0
  147. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
  148. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +211 -0
  149. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +161 -0
  150. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +137 -0
  151. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +62 -0
  152. examples/workflows/math_rl/configs/rl_from_base_qwen.toml +27 -0
  153. examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +5 -0
  154. synth_ai/__init__.py +44 -30
  155. synth_ai/_utils/__init__.py +47 -0
  156. synth_ai/_utils/base_url.py +10 -0
  157. synth_ai/_utils/http.py +10 -0
  158. synth_ai/_utils/prompts.py +10 -0
  159. synth_ai/_utils/task_app_state.py +12 -0
  160. synth_ai/_utils/user_config.py +10 -0
  161. synth_ai/api/models/supported.py +144 -7
  162. synth_ai/api/train/__init__.py +13 -1
  163. synth_ai/api/train/builders.py +9 -3
  164. synth_ai/api/train/cli.py +155 -17
  165. synth_ai/api/train/config_finder.py +18 -11
  166. synth_ai/api/train/configs/__init__.py +8 -1
  167. synth_ai/api/train/configs/rl.py +32 -7
  168. synth_ai/api/train/configs/sft.py +6 -2
  169. synth_ai/api/train/configs/shared.py +59 -2
  170. synth_ai/api/train/env_resolver.py +13 -10
  171. synth_ai/auth/credentials.py +119 -0
  172. synth_ai/cli/__init__.py +61 -69
  173. synth_ai/cli/_modal_wrapper.py +7 -5
  174. synth_ai/cli/_typer_patch.py +0 -2
  175. synth_ai/cli/_validate_task_app.py +22 -4
  176. synth_ai/cli/commands/__init__.py +17 -0
  177. synth_ai/cli/commands/demo/__init__.py +6 -0
  178. synth_ai/cli/commands/demo/core.py +163 -0
  179. synth_ai/cli/commands/deploy/__init__.py +23 -0
  180. synth_ai/cli/commands/deploy/core.py +614 -0
  181. synth_ai/cli/commands/deploy/errors.py +72 -0
  182. synth_ai/cli/commands/deploy/validation.py +11 -0
  183. synth_ai/cli/commands/eval/__init__.py +19 -0
  184. synth_ai/cli/commands/eval/core.py +1109 -0
  185. synth_ai/cli/commands/eval/errors.py +81 -0
  186. synth_ai/cli/commands/eval/validation.py +133 -0
  187. synth_ai/cli/commands/filter/__init__.py +12 -0
  188. synth_ai/cli/commands/filter/core.py +388 -0
  189. synth_ai/cli/commands/filter/errors.py +55 -0
  190. synth_ai/cli/commands/filter/validation.py +77 -0
  191. synth_ai/cli/commands/help/__init__.py +177 -0
  192. synth_ai/cli/commands/help/core.py +73 -0
  193. synth_ai/cli/commands/status/__init__.py +64 -0
  194. synth_ai/cli/commands/status/client.py +192 -0
  195. synth_ai/cli/commands/status/config.py +92 -0
  196. synth_ai/cli/commands/status/errors.py +20 -0
  197. synth_ai/cli/commands/status/formatters.py +164 -0
  198. synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
  199. synth_ai/cli/commands/status/subcommands/files.py +79 -0
  200. synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
  201. synth_ai/cli/commands/status/subcommands/models.py +79 -0
  202. synth_ai/cli/commands/status/subcommands/runs.py +81 -0
  203. synth_ai/cli/commands/status/subcommands/summary.py +47 -0
  204. synth_ai/cli/commands/status/utils.py +114 -0
  205. synth_ai/cli/commands/train/__init__.py +53 -0
  206. synth_ai/cli/commands/train/core.py +21 -0
  207. synth_ai/cli/commands/train/errors.py +117 -0
  208. synth_ai/cli/commands/train/judge_schemas.py +199 -0
  209. synth_ai/cli/commands/train/judge_validation.py +304 -0
  210. synth_ai/cli/commands/train/validation.py +443 -0
  211. synth_ai/cli/demo.py +2 -162
  212. synth_ai/cli/deploy/__init__.py +28 -0
  213. synth_ai/cli/deploy/core.py +5 -0
  214. synth_ai/cli/deploy/errors.py +23 -0
  215. synth_ai/cli/deploy/validation.py +5 -0
  216. synth_ai/cli/eval/__init__.py +36 -0
  217. synth_ai/cli/eval/core.py +5 -0
  218. synth_ai/cli/eval/errors.py +31 -0
  219. synth_ai/cli/eval/validation.py +5 -0
  220. synth_ai/cli/filter/__init__.py +28 -0
  221. synth_ai/cli/filter/core.py +5 -0
  222. synth_ai/cli/filter/errors.py +23 -0
  223. synth_ai/cli/filter/validation.py +5 -0
  224. synth_ai/cli/legacy_root_backup.py +3 -1
  225. synth_ai/cli/lib/__init__.py +10 -0
  226. synth_ai/cli/lib/task_app_discovery.py +7 -0
  227. synth_ai/cli/lib/task_app_env.py +518 -0
  228. synth_ai/cli/modal_serve/__init__.py +12 -0
  229. synth_ai/cli/modal_serve/core.py +14 -0
  230. synth_ai/cli/modal_serve/errors.py +8 -0
  231. synth_ai/cli/modal_serve/validation.py +11 -0
  232. synth_ai/cli/recent.py +2 -1
  233. synth_ai/cli/serve/__init__.py +12 -0
  234. synth_ai/cli/serve/core.py +14 -0
  235. synth_ai/cli/serve/errors.py +8 -0
  236. synth_ai/cli/serve/validation.py +11 -0
  237. synth_ai/cli/setup.py +21 -0
  238. synth_ai/cli/status.py +7 -126
  239. synth_ai/cli/task_app_deploy.py +7 -0
  240. synth_ai/cli/task_app_list.py +25 -0
  241. synth_ai/cli/task_app_modal_serve.py +11 -0
  242. synth_ai/cli/task_app_serve.py +11 -0
  243. synth_ai/cli/task_apps.py +110 -1499
  244. synth_ai/cli/traces.py +1 -1
  245. synth_ai/cli/train/__init__.py +12 -0
  246. synth_ai/cli/train/core.py +21 -0
  247. synth_ai/cli/train/errors.py +8 -0
  248. synth_ai/cli/train/validation.py +24 -0
  249. synth_ai/cli/train.py +5 -0
  250. synth_ai/cli/turso.py +1 -1
  251. synth_ai/cli/watch.py +1 -1
  252. synth_ai/demos/__init__.py +10 -0
  253. synth_ai/demos/core/__init__.py +28 -1
  254. synth_ai/demos/crafter/__init__.py +1 -0
  255. synth_ai/demos/crafter/crafter_fft_4b.toml +55 -0
  256. synth_ai/demos/crafter/grpo_crafter_task_app.py +185 -0
  257. synth_ai/demos/crafter/rl_from_base_qwen4b.toml +74 -0
  258. synth_ai/demos/demo_registry.py +176 -0
  259. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
  260. synth_ai/demos/math/__init__.py +1 -0
  261. synth_ai/demos/math/_common.py +16 -0
  262. synth_ai/demos/math/app.py +38 -0
  263. synth_ai/demos/math/config.toml +76 -0
  264. synth_ai/demos/math/deploy_modal.py +54 -0
  265. synth_ai/demos/math/modal_task_app.py +702 -0
  266. synth_ai/demos/math/task_app_entry.py +51 -0
  267. synth_ai/environments/environment/core.py +7 -1
  268. synth_ai/environments/examples/bandit/engine.py +0 -1
  269. synth_ai/environments/examples/bandit/environment.py +0 -1
  270. synth_ai/environments/examples/red/engine.py +33 -12
  271. synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
  272. synth_ai/environments/examples/red/environment.py +26 -0
  273. synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
  274. synth_ai/environments/examples/wordle/environment.py +0 -1
  275. synth_ai/evals/base.py +16 -5
  276. synth_ai/evals/client.py +1 -1
  277. synth_ai/http.py +8 -22
  278. synth_ai/inference/client.py +1 -1
  279. synth_ai/judge_schemas.py +4 -5
  280. synth_ai/learning/client.py +1 -1
  281. synth_ai/learning/health.py +1 -1
  282. synth_ai/learning/jobs.py +1 -1
  283. synth_ai/learning/rl/client.py +4 -2
  284. synth_ai/learning/rl/env_keys.py +1 -1
  285. synth_ai/learning/rl/secrets.py +1 -1
  286. synth_ai/learning/sft/client.py +1 -1
  287. synth_ai/learning/sft/data.py +407 -4
  288. synth_ai/learning/validators.py +4 -1
  289. synth_ai/streaming/__init__.py +29 -0
  290. synth_ai/streaming/config.py +94 -0
  291. synth_ai/streaming/handlers.py +469 -0
  292. synth_ai/streaming/streamer.py +301 -0
  293. synth_ai/streaming/types.py +95 -0
  294. synth_ai/task/apps/__init__.py +4 -2
  295. synth_ai/task/config.py +6 -4
  296. synth_ai/task/rubrics/__init__.py +1 -2
  297. synth_ai/task/rubrics/loaders.py +14 -10
  298. synth_ai/task/rubrics.py +219 -0
  299. synth_ai/task/trace_correlation_helpers.py +24 -11
  300. synth_ai/task/tracing_utils.py +14 -3
  301. synth_ai/task/validators.py +0 -1
  302. synth_ai/tracing_v3/abstractions.py +3 -3
  303. synth_ai/tracing_v3/config.py +15 -13
  304. synth_ai/tracing_v3/constants.py +21 -0
  305. synth_ai/tracing_v3/db_config.py +3 -1
  306. synth_ai/tracing_v3/decorators.py +10 -7
  307. synth_ai/tracing_v3/llm_call_record_helpers.py +5 -5
  308. synth_ai/tracing_v3/migration_helper.py +1 -2
  309. synth_ai/tracing_v3/session_tracer.py +7 -7
  310. synth_ai/tracing_v3/storage/base.py +29 -29
  311. synth_ai/tracing_v3/storage/config.py +3 -3
  312. synth_ai/tracing_v3/turso/daemon.py +8 -9
  313. synth_ai/tracing_v3/turso/native_manager.py +80 -72
  314. synth_ai/tracing_v3/utils.py +2 -2
  315. synth_ai/utils/__init__.py +101 -0
  316. synth_ai/utils/base_url.py +94 -0
  317. synth_ai/utils/cli.py +131 -0
  318. synth_ai/utils/env.py +294 -0
  319. synth_ai/utils/http.py +172 -0
  320. synth_ai/utils/modal.py +308 -0
  321. synth_ai/utils/process.py +212 -0
  322. synth_ai/utils/prompts.py +39 -0
  323. synth_ai/utils/sqld.py +122 -0
  324. synth_ai/utils/task_app_discovery.py +882 -0
  325. synth_ai/utils/task_app_env.py +186 -0
  326. synth_ai/utils/task_app_state.py +318 -0
  327. synth_ai/utils/user_config.py +137 -0
  328. synth_ai/v0/config/__init__.py +1 -5
  329. synth_ai/v0/config/base_url.py +1 -7
  330. synth_ai/v0/tracing/config.py +1 -1
  331. synth_ai/v0/tracing/decorators.py +1 -1
  332. synth_ai/v0/tracing/upload.py +1 -1
  333. synth_ai/v0/tracing_v1/config.py +1 -1
  334. synth_ai/v0/tracing_v1/decorators.py +1 -1
  335. synth_ai/v0/tracing_v1/upload.py +1 -1
  336. {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/METADATA +91 -32
  337. {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/RECORD +341 -154
  338. synth_ai/cli/man.py +0 -106
  339. synth_ai/cli/tui.py +0 -57
  340. synth_ai/compound/cais.py +0 -0
  341. synth_ai/core/experiment.py +0 -13
  342. synth_ai/core/system.py +0 -15
  343. synth_ai/demo_registry.py +0 -295
  344. synth_ai/handshake.py +0 -109
  345. synth_ai/tui/__init__.py +0 -5
  346. synth_ai/tui/__main__.py +0 -13
  347. synth_ai/tui/cli/__init__.py +0 -1
  348. synth_ai/tui/cli/query_experiments.py +0 -164
  349. synth_ai/tui/cli/query_experiments_v3.py +0 -164
  350. synth_ai/tui/dashboard.py +0 -906
  351. {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/WHEEL +0 -0
  352. {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/entry_points.txt +0 -0
  353. {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/licenses/LICENSE +0 -0
  354. {synth_ai-0.2.14.dist-info → synth_ai-0.2.17.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,333 @@
1
+ # Vision RL Integration Testing
2
+
3
+ Complete integration tests for Reinforcement Learning with vision-language models using the Crafter task app.
4
+
5
+ ## Overview
6
+
7
+ These tests verify the full vision RL pipeline:
8
+ 1. **Task App**: Same Crafter task app used for SFT data collection (generates image observations)
9
+ 2. **Model**: Qwen3-VL-4B (smaller, faster for testing)
10
+ 3. **Policy**: Uses `image_only_mode=true` - agent sees only images, no text observations
11
+ 4. **Training**: Full RL (GRPO/GSPO) with vision-capable model
12
+
13
+ ## Files
14
+
15
+ ### Configs
16
+ - `configs/crafter_rl_vision_qwen3vl4b.toml` - Full RL config for Qwen3-VL-4B with vision
17
+
18
+ ### Tests
19
+ - `../../tests/integration/cli/test_cli_train_rl_vision.py` - Integration tests:
20
+ - `test_cli_train_rl_vision_qwen3vl4b` - Full RL training test
21
+ - `test_task_app_vision_support` - Task app vision capability test
22
+
23
+ ## Quick Start
24
+
25
+ ### 1. Prerequisites
26
+
27
+ ```bash
28
+ # Required environment variables
29
+ export SYNTH_API_KEY="your-api-key"
30
+ export BACKEND_BASE_URL="https://agent-learning.onrender.com/api" # or your backend
31
+ export ENVIRONMENT_API_KEY="your-modal-key" # For Modal deployment
32
+
33
+ # Optional: for faster testing
34
+ export TASK_APP_WARMUP_TIMEOUT=300 # 5min for vision models
35
+ export SYNTH_TRAIN_TEST_POLL_TIMEOUT=180
36
+ ```
37
+
38
+ ### 2. Run Tests
39
+
40
+ ```bash
41
+ cd /Users/joshpurtell/Documents/GitHub/synth-ai
42
+
43
+ # Run all vision RL tests
44
+ uv run pytest tests/integration/cli/test_cli_train_rl_vision.py -v -s
45
+
46
+ # Run specific test
47
+ uv run pytest tests/integration/cli/test_cli_train_rl_vision.py::test_cli_train_rl_vision_qwen3vl4b -v -s
48
+
49
+ # Run with marks
50
+ uv run pytest -m "vision and slow" -v -s
51
+ ```
52
+
53
+ ### 3. Manual RL Training (without pytest)
54
+
55
+ ```bash
56
+ # 1. Deploy task app (if not already deployed)
57
+ uvx synth-ai task-app deploy grpo-crafter --name grpo-crafter-task-app
58
+
59
+ # 2. Get task app URL (from deploy output)
60
+ export TASK_APP_URL="https://your-app.modal.run"
61
+
62
+ # 3. Run RL training
63
+ uvx synth-ai train \
64
+ --type rl \
65
+ --config examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml \
66
+ --backend $BACKEND_BASE_URL \
67
+ --task-url $TASK_APP_URL
68
+ ```
69
+
70
+ ## Configuration Details
71
+
72
+ ### Model: Qwen3-VL-4B
73
+ ```toml
74
+ [model]
75
+ base = "Qwen/Qwen3-VL-4B-Instruct"
76
+ trainer_mode = "lora"
77
+ supports_vision = true # Enable vision support
78
+ ```
79
+
80
+ ### Vision-Specific Settings
81
+ ```toml
82
+ [vllm]
83
+ limit_mm_per_prompt = { "image": 1 } # Max 1 image per prompt
84
+
85
+ [rollout.policy_config]
86
+ use_vision = true # Enable vision input
87
+ image_only_mode = true # Use only images, no text observations
88
+ temperature = 0.6
89
+ max_tokens = 512
90
+
91
+ [training]
92
+ batch_size = 2 # Smaller for vision models (memory)
93
+ max_images_per_message = 1
94
+ supports_vision = true
95
+ ```
96
+
97
+ ### GPU Allocation (2x H200)
98
+ ```toml
99
+ [topology]
100
+ gpus_for_vllm = 1 # Inference
101
+ gpus_for_training = 1 # Training
102
+ tensor_parallel = 1
103
+ ```
104
+
105
+ ## Test Details
106
+
107
+ ### Test 1: Full RL Training
108
+ **Function:** `test_cli_train_rl_vision_qwen3vl4b`
109
+
110
+ **What it tests:**
111
+ 1. Task app deployment
112
+ 2. Task app warmup (health check)
113
+ 3. RL job submission with vision config
114
+ 4. Job creation confirmation
115
+
116
+ **Expected output:**
117
+ ```
118
+ ✅ Vision RL job created: job-abc123
119
+ Model: Qwen3-VL-4B
120
+ Task App: https://your-app.modal.run
121
+ Image Mode: image_only
122
+ ```
123
+
124
+ **Runtime:** ~5-10 minutes (deploy + warmup + job submit)
125
+
126
+ ### Test 2: Task App Vision Support
127
+ **Function:** `test_task_app_vision_support`
128
+
129
+ **What it tests:**
130
+ 1. Task app can be deployed
131
+ 2. Task app health endpoint responds
132
+ 3. Task app accepts vision policy config
133
+ 4. Can make rollout request with `use_vision=true` and `image_only_mode=true`
134
+
135
+ **Expected output:**
136
+ ```
137
+ ✅ Task app supports vision config
138
+ Response keys: ['trajectory', 'metadata', ...]
139
+ ```
140
+
141
+ **Runtime:** ~2-3 minutes (deploy + warmup + single rollout)
142
+
143
+ ## Task App Details
144
+
145
+ The Crafter task app (`grpo-crafter-task-app`) provides:
146
+
147
+ ### Environment
148
+ - **Crafter game** with visual observations
149
+ - Generates RGB images (64x64 or configurable)
150
+ - Text observations also available (but ignored in `image_only_mode`)
151
+
152
+ ### Policy (crafter-react)
153
+ - **Vision Detection:** Auto-detects vision models from name (e.g., "Qwen3-VL", "gpt-4o-mini")
154
+ - **Image Formatting:** Converts observations to OpenAI-style multimodal messages
155
+ - **Tool Calling:** Supports structured action space via tools
156
+
157
+ ### Trace Format
158
+ - **Structured traces** with multimodal messages
159
+ - Images stored as base64 in trace DB
160
+ - Compatible with `synth-ai filter` for SFT export
161
+
162
+ ## Integration with SFT Pipeline
163
+
164
+ This RL setup uses the **same task app** as the SFT data collection:
165
+
166
+ ### SFT Data Collection
167
+ ```bash
168
+ # Collect episodes with gpt-4o-mini teacher
169
+ uvx synth-ai eval --config configs/eval_gpt4o_vision_proper.toml
170
+
171
+ # Export to SFT dataset
172
+ uvx synth-ai filter --config configs/filter_vision_sft.toml
173
+ ```
174
+
175
+ ### RL Training
176
+ ```bash
177
+ # Train student model (Qwen3-VL-4B) with RL
178
+ uvx synth-ai train \
179
+ --type rl \
180
+ --config configs/crafter_rl_vision_qwen3vl4b.toml
181
+ ```
182
+
183
+ **Benefits:**
184
+ 1. **Consistency:** Same environment, same observations
185
+ 2. **Curriculum:** SFT → RL progression
186
+ 3. **Debugging:** Compare SFT and RL traces in same format
187
+
188
+ ## Troubleshooting
189
+
190
+ ### Task App Deployment Fails
191
+ ```bash
192
+ # Check Modal auth
193
+ modal token set --token-id <id> --token-secret <secret>
194
+
195
+ # Check environment variables
196
+ echo $SYNTH_API_KEY
197
+ echo $ENVIRONMENT_API_KEY
198
+
199
+ # Try manual deploy
200
+ uvx synth-ai task-app deploy grpo-crafter --name grpo-crafter-task-app
201
+ ```
202
+
203
+ ### Task App Won't Warm Up
204
+ ```bash
205
+ # Increase timeout
206
+ export TASK_APP_WARMUP_TIMEOUT=600 # 10 minutes
207
+
208
+ # Check task app logs in Modal dashboard
209
+ # https://modal.com/apps
210
+
211
+ # Try health check manually
212
+ curl https://your-app.modal.run/health
213
+ ```
214
+
215
+ ### RL Job Submission Fails
216
+ ```bash
217
+ # Check backend connectivity
218
+ curl $BACKEND_BASE_URL/health
219
+
220
+ # Verify API key
221
+ curl -H "Authorization: Bearer $SYNTH_API_KEY" $BACKEND_BASE_URL/api/health
222
+
223
+ # Check task app URL format
224
+ echo $TASK_APP_URL # Should be https://...modal.run
225
+ ```
226
+
227
+ ### Vision Model OOM (Out of Memory)
228
+ ```toml
229
+ # Reduce batch size in config
230
+ [training]
231
+ batch_size = 1 # Down from 2
232
+ gradient_accumulation_steps = 4 # Up from 2
233
+
234
+ # Reduce concurrent rollouts
235
+ [rollout]
236
+ max_concurrent_rollouts = 2 # Down from 4
237
+ ```
238
+
239
+ ### Images Not Appearing in Training
240
+ ```bash
241
+ # Verify vision support is enabled
242
+ grep -A 5 "\[model\]" configs/crafter_rl_vision_qwen3vl4b.toml
243
+ # Should show: supports_vision = true
244
+
245
+ # Check policy config
246
+ grep -A 10 "\[rollout.policy_config\]" configs/crafter_rl_vision_qwen3vl4b.toml
247
+ # Should show: use_vision = true, image_only_mode = true
248
+
249
+ # Verify vLLM config
250
+ grep -A 3 "\[vllm\]" configs/crafter_rl_vision_qwen3vl4b.toml
251
+ # Should show: limit_mm_per_prompt = { "image": 1 }
252
+ ```
253
+
254
+ ## Performance Expectations
255
+
256
+ ### Qwen3-VL-4B (2x H200)
257
+ - **Throughput:** ~2-4 episodes/min (with TP=1)
258
+ - **Memory:** ~40-60GB GPU (model + images + gradients)
259
+ - **Iteration Time:** ~10-15 min (with 4 episodes, 10 steps each)
260
+
261
+ ### Training Time Estimates
262
+ - **3 iterations (test):** ~30-45 minutes
263
+ - **10 iterations (short run):** ~2-3 hours
264
+ - **50 iterations (full run):** ~12-20 hours
265
+
266
+ ## Next Steps
267
+
268
+ ### 1. Baseline Evaluation
269
+ ```bash
270
+ # Evaluate untrained model
271
+ uvx synth-ai eval \
272
+ --model Qwen/Qwen3-VL-4B-Instruct \
273
+ --env crafter \
274
+ --seeds 0,1,2,3,4 \
275
+ --policy-config '{"use_vision": true, "image_only_mode": true}'
276
+ ```
277
+
278
+ ### 2. SFT Initialization (Optional)
279
+ ```bash
280
+ # Train on teacher demonstrations first
281
+ uvx synth-ai train \
282
+ --type sft \
283
+ --model Qwen/Qwen3-VL-4B-Instruct \
284
+ --data traces/gpt4o_vision/sft/train.jsonl
285
+ ```
286
+
287
+ ### 3. RL Fine-Tuning
288
+ ```bash
289
+ # Run full RL training
290
+ uvx synth-ai train \
291
+ --type rl \
292
+ --config configs/crafter_rl_vision_qwen3vl4b.toml \
293
+ --iterations 50
294
+ ```
295
+
296
+ ### 4. Eval Comparison
297
+ ```bash
298
+ # Compare pre-trained vs post-RL
299
+ uvx synth-ai eval --model <rl-checkpoint> --seeds 0-9
300
+ ```
301
+
302
+ ## References
303
+
304
+ - **VLM SFT Pipeline:** `examples/qwen_vl/PIPELINE_RUN_LOG.txt`
305
+ - **Image Validation:** `examples/qwen_vl/IMAGE_VALIDATION_COMPLETE.md`
306
+ - **Task App Source:** `examples/task_apps/crafter/task_app/`
307
+ - **Policy Implementation:** `examples/task_apps/crafter/task_app/synth_envs_hosted/policy.py`
308
+
309
+ ## CI Integration
310
+
311
+ ### Pytest Marks
312
+ ```python
313
+ @pytest.mark.slow # Takes >5 minutes
314
+ @pytest.mark.vision # Requires vision model support
315
+ @pytest.mark.integration # Full pipeline test
316
+ ```
317
+
318
+ ### Run in CI
319
+ ```bash
320
+ # Run all integration tests including vision
321
+ pytest tests/integration/cli/ -m integration -v
322
+
323
+ # Run only vision tests
324
+ pytest -m vision -v
325
+
326
+ # Skip slow tests for PR checks
327
+ pytest -m "not slow" -v
328
+ ```
329
+
330
+ ---
331
+
332
+ **Status:** ✅ Integration tests ready. Task app and RL config validated for Qwen3-VL-4B with image-only observations.
333
+
@@ -0,0 +1,328 @@
1
+ # SDK Vision Support Integration
2
+
3
+ **Status**: ✅ Complete
4
+
5
+ ## Overview
6
+
7
+ Added comprehensive vision/multimodal support to the synth-ai SDK's SFT data module, and integrated it with the monorepo backend for consistent multimodal data handling across both codebases.
8
+
9
+ ## Changes Made
10
+
11
+ ### 1. **SDK Enhancement** (`synth-ai/synth_ai/learning/sft/data.py`)
12
+
13
+ Added vision-specific utilities to the SDK:
14
+
15
+ #### New Functions
16
+
17
+ 1. **`has_image_content(content: SFTMessageContent) -> bool`**
18
+ - Detects if message content contains images
19
+ - Supports OpenAI multimodal format
20
+ - Handles both `{"type": "image_url"}` and `{"type": "image"}` formats
21
+
22
+ 2. **`message_has_image(message: SFTMessage) -> bool`**
23
+ - Checks if an SFTMessage contains image content
24
+ - Convenience wrapper around `has_image_content`
25
+
26
+ 3. **`example_has_image(example: SFTExample) -> bool`**
27
+ - Checks if any message in an SFTExample contains images
28
+ - Used for filtering vision datasets
29
+
30
+ 4. **`count_images_in_content(content: SFTMessageContent) -> int`**
31
+ - Counts number of image segments in message content
32
+ - Useful for statistics and validation
33
+
34
+ 5. **`extract_image_urls(content: SFTMessageContent) -> list[str]`**
35
+ - Extracts all image URLs from message content
36
+ - Supports http(s):// URLs and ..."}}
130
+ ]
131
+ },
132
+ {
133
+ "role": "assistant",
134
+ "content": "I see a cat sitting on a couch."
135
+ }
136
+ ],
137
+ "metadata": {
138
+ "session_id": "ep001",
139
+ "has_image": true
140
+ }
141
+ }
142
+ ```
143
+
144
+ ### Alternative Formats (Also Supported)
145
+
146
+ **Legacy image field**:
147
+ ```json
148
+ {
149
+ "messages": [...],
150
+ "images": ["/path/to/image.jpg"],
151
+ "metadata": {}
152
+ }
153
+ ```
154
+
155
+ **Single image field**:
156
+ ```json
157
+ {
158
+ "messages": [...],
159
+ "image": "https://example.com/image.jpg",
160
+ "metadata": {}
161
+ }
162
+ ```
163
+
164
+ ## Image URL Formats
165
+
166
+ Supported image URL formats:
167
+
168
+ 1. **HTTP(S) URLs**: `https://example.com/image.jpg`
169
+ 2. **Data URLs (base64)**: `...`
170
+ 3. **Local file paths**: `/path/to/image.jpg` (for local training only)
171
+
172
+ ## Validation Rules
173
+
174
+ The SDK validates:
175
+
176
+ 1. **Image presence**: At least one message must contain an image (when `require_images=True`)
177
+ 2. **URL format**: All image URLs must be non-empty strings
178
+ 3. **URL scheme**: URLs should start with `http://`, `https://`, or `"}}
222
+ ]
223
+ assert has_image_content(content) == True
224
+
225
+ # Test validation
226
+ example_data = {
227
+ "messages": [
228
+ {"role": "user", "content": content},
229
+ {"role": "assistant", "content": "A test image"}
230
+ ]
231
+ }
232
+ example = coerce_example(example_data)
233
+ is_valid, error = validate_vision_example(example)
234
+ assert is_valid == True
235
+ print("✓ SDK vision utilities working correctly!")
236
+ ```
237
+
238
+ ### Integration Test
239
+
240
+ ```python
241
+ # Test in monorepo backend
242
+ from backend.app.routes.simple_training.training.sft.data import SFTDataProcessor
243
+
244
+ processor = SFTDataProcessor()
245
+ test_data = [{
246
+ "messages": [
247
+ {"role": "user", "content": [
248
+ {"type": "text", "text": "Describe this."},
249
+ {"type": "image_url", "image_url": {"url": "data:image/png;base64,..."}}
250
+ ]},
251
+ {"role": "assistant", "content": "Description"}
252
+ ]
253
+ }]
254
+
255
+ validated = processor._validate_vision_examples(test_data)
256
+ assert len(validated) == 1
257
+ print("✓ Backend SDK integration working!")
258
+ ```
259
+
260
+ ## Future Enhancements
261
+
262
+ ### Potential Additions
263
+
264
+ 1. **Image preprocessing utilities**
265
+ - Resize images to model requirements
266
+ - Validate image dimensions
267
+ - Convert between formats (JPEG ↔ PNG)
268
+
269
+ 2. **Base64 encoding helpers**
270
+ - Convert file paths to data URLs
271
+ - Batch encode images for JSONL
272
+ - Memory-efficient streaming
273
+
274
+ 3. **Statistics and analytics**
275
+ - Count images per example
276
+ - Measure average image sizes
277
+ - Detect corrupted or invalid images
278
+
279
+ 4. **Dataset transformation**
280
+ - Convert between formats
281
+ - Augment with additional images
282
+ - Filter by image properties
283
+
284
+ ## Migration Guide
285
+
286
+ ### For Existing Backend Code
287
+
288
+ If you have existing vision validation code:
289
+
290
+ ```python
291
+ # Before (manual validation)
292
+ def has_images(messages):
293
+ for msg in messages:
294
+ content = msg.get("content")
295
+ if isinstance(content, list):
296
+ for part in content:
297
+ if part.get("type") == "image_url":
298
+ return True
299
+ return False
300
+
301
+ # After (use SDK)
302
+ from synth_ai.learning.sft.data import has_image_content
303
+
304
+ def has_images(messages):
305
+ return any(has_image_content(msg.get("content")) for msg in messages)
306
+ ```
307
+
308
+ ### For Existing SDK Code
309
+
310
+ No changes needed! The SDK already handles OpenAI message formats correctly. Vision utilities are additive and don't break existing functionality.
311
+
312
+ ## Documentation
313
+
314
+ - **SDK docs**: See `synth_ai/learning/sft/data.py` docstrings
315
+ - **Backend docs**: See `backend/app/routes/simple_training/training/sft/data.py` class docstring
316
+ - **Examples**: See `synth-ai/examples/qwen_vl/` for vision-specific examples
317
+
318
+ ## Related Files
319
+
320
+ - SDK: `synth-ai/synth_ai/learning/sft/data.py`
321
+ - Backend: `monorepo/backend/app/routes/simple_training/training/sft/data.py`
322
+ - Examples: `synth-ai/examples/qwen_vl/`
323
+ - Pipeline guide: `synth-ai/examples/qwen_vl/NEXT_STEPS_2B.md`
324
+
325
+ ---
326
+
327
+ ✅ **SDK vision support is now production-ready for both synth-ai and monorepo!**
328
+