synth-ai 0.2.13.dev2__py3-none-any.whl → 0.2.16__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (293) hide show
  1. examples/README.md +1 -0
  2. examples/multi_step/SFT_README.md +147 -0
  3. examples/multi_step/configs/README_verilog_rl.md +77 -0
  4. examples/multi_step/configs/VERILOG_REWARDS.md +90 -0
  5. examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +183 -0
  6. examples/multi_step/configs/crafter_eval_synth_qwen4b.toml +35 -0
  7. examples/multi_step/configs/crafter_eval_text_only_groq_qwen32b.toml +36 -0
  8. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +12 -11
  9. examples/multi_step/configs/crafter_sft_qwen30b_lora.toml +62 -0
  10. examples/multi_step/configs/crafter_synth_backend.md +40 -0
  11. examples/multi_step/configs/verilog_eval_groq_qwen32b.toml +31 -0
  12. examples/multi_step/configs/verilog_eval_synth_qwen8b.toml +33 -0
  13. examples/multi_step/configs/verilog_rl_lora.toml +190 -0
  14. examples/multi_step/convert_traces_to_sft.py +84 -0
  15. examples/multi_step/judges/crafter_backend_judge.py +220 -0
  16. examples/multi_step/judges/verilog_backend_judge.py +234 -0
  17. examples/multi_step/readme.md +48 -0
  18. examples/multi_step/run_sft_qwen30b.sh +45 -0
  19. examples/multi_step/verilog_rl_lora.md +218 -0
  20. examples/qwen_coder/configs/coder_lora_30b.toml +3 -2
  21. examples/qwen_coder/configs/coder_lora_4b.toml +2 -1
  22. examples/qwen_coder/configs/coder_lora_small.toml +2 -1
  23. examples/qwen_vl/BUGS_AND_FIXES.md +232 -0
  24. examples/qwen_vl/IMAGE_VALIDATION_COMPLETE.md +271 -0
  25. examples/qwen_vl/IMAGE_VALIDATION_SUMMARY.md +260 -0
  26. examples/qwen_vl/INFERENCE_SFT_TESTS.md +412 -0
  27. examples/qwen_vl/NEXT_STEPS_2B.md +325 -0
  28. examples/qwen_vl/QUICKSTART.md +327 -0
  29. examples/qwen_vl/QUICKSTART_RL_VISION.md +110 -0
  30. examples/qwen_vl/README.md +154 -0
  31. examples/qwen_vl/RL_VISION_COMPLETE.md +475 -0
  32. examples/qwen_vl/RL_VISION_TESTING.md +333 -0
  33. examples/qwen_vl/SDK_VISION_INTEGRATION.md +328 -0
  34. examples/qwen_vl/SETUP_COMPLETE.md +275 -0
  35. examples/qwen_vl/VISION_TESTS_COMPLETE.md +490 -0
  36. examples/qwen_vl/VLM_PIPELINE_COMPLETE.md +242 -0
  37. examples/qwen_vl/__init__.py +2 -0
  38. examples/qwen_vl/collect_data_via_cli.md +423 -0
  39. examples/qwen_vl/collect_vision_traces.py +368 -0
  40. examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +127 -0
  41. examples/qwen_vl/configs/crafter_vlm_sft_example.toml +60 -0
  42. examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +43 -0
  43. examples/qwen_vl/configs/eval_gpt4o_vision_proper.toml +29 -0
  44. examples/qwen_vl/configs/eval_gpt5nano_vision.toml +45 -0
  45. examples/qwen_vl/configs/eval_qwen2vl_vision.toml +44 -0
  46. examples/qwen_vl/configs/filter_qwen2vl_sft.toml +50 -0
  47. examples/qwen_vl/configs/filter_vision_sft.toml +53 -0
  48. examples/qwen_vl/configs/filter_vision_test.toml +8 -0
  49. examples/qwen_vl/configs/sft_qwen3_vl_2b_test.toml +54 -0
  50. examples/qwen_vl/crafter_gpt5nano_agent.py +308 -0
  51. examples/qwen_vl/crafter_qwen_vl_agent.py +300 -0
  52. examples/qwen_vl/run_vision_comparison.sh +62 -0
  53. examples/qwen_vl/run_vision_sft_pipeline.sh +175 -0
  54. examples/qwen_vl/test_image_validation.py +201 -0
  55. examples/qwen_vl/test_sft_vision_data.py +110 -0
  56. examples/rl/README.md +1 -1
  57. examples/rl/configs/eval_base_qwen.toml +17 -0
  58. examples/rl/configs/eval_rl_qwen.toml +13 -0
  59. examples/rl/configs/rl_from_base_qwen.toml +37 -0
  60. examples/rl/configs/rl_from_base_qwen17.toml +76 -0
  61. examples/rl/configs/rl_from_ft_qwen.toml +37 -0
  62. examples/rl/run_eval.py +436 -0
  63. examples/rl/run_rl_and_save.py +111 -0
  64. examples/rl/task_app/README.md +22 -0
  65. examples/rl/task_app/math_single_step.py +990 -0
  66. examples/rl/task_app/math_task_app.py +111 -0
  67. examples/sft/README.md +5 -5
  68. examples/sft/configs/crafter_fft_qwen0p6b.toml +4 -2
  69. examples/sft/configs/crafter_lora_qwen0p6b.toml +4 -3
  70. examples/sft/evaluate.py +4 -4
  71. examples/sft/export_dataset.py +7 -4
  72. examples/sft/generate_traces.py +2 -0
  73. examples/swe/task_app/README.md +1 -1
  74. examples/swe/task_app/grpo_swe_mini.py +1 -1
  75. examples/swe/task_app/grpo_swe_mini_task_app.py +0 -12
  76. examples/swe/task_app/hosted/envs/mini_swe/environment.py +13 -13
  77. examples/swe/task_app/hosted/policy_routes.py +0 -2
  78. examples/swe/task_app/hosted/rollout.py +2 -8
  79. examples/task_apps/IMAGE_ONLY_EVAL_QUICKSTART.md +258 -0
  80. examples/task_apps/crafter/CREATE_SFT_DATASET.md +273 -0
  81. examples/task_apps/crafter/EVAL_IMAGE_ONLY_RESULTS.md +152 -0
  82. examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +174 -0
  83. examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +268 -0
  84. examples/task_apps/crafter/QUERY_EXAMPLES.md +203 -0
  85. examples/task_apps/crafter/README_IMAGE_ONLY_EVAL.md +316 -0
  86. examples/task_apps/crafter/eval_image_only_gpt4o.toml +28 -0
  87. examples/task_apps/crafter/eval_text_only_groq_llama.toml +36 -0
  88. examples/task_apps/crafter/filter_sft_dataset.toml +16 -0
  89. examples/task_apps/crafter/task_app/__init__.py +3 -0
  90. examples/task_apps/crafter/task_app/grpo_crafter.py +309 -14
  91. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/environment.py +10 -0
  92. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +75 -4
  93. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +17 -2
  94. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +55 -3
  95. examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +114 -32
  96. examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +127 -27
  97. examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +156 -0
  98. examples/task_apps/enron/__init__.py +1 -0
  99. examples/task_apps/enron/filter_sft.toml +5 -0
  100. examples/task_apps/enron/tests/__init__.py +2 -0
  101. examples/task_apps/enron/tests/integration/__init__.py +2 -0
  102. examples/task_apps/enron/tests/integration/test_enron_eval.py +2 -0
  103. examples/task_apps/enron/tests/unit/__init__.py +2 -0
  104. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_COMPLETE.md +283 -0
  105. examples/task_apps/pokemon_red/EVAL_IMAGE_ONLY_STATUS.md +155 -0
  106. examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +415 -0
  107. examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +29 -0
  108. examples/task_apps/pokemon_red/pallet_town_rl_config.toml +2 -0
  109. examples/task_apps/pokemon_red/task_app.py +199 -6
  110. examples/task_apps/pokemon_red/test_pallet_town_rewards.py +2 -0
  111. examples/task_apps/sokoban/filter_sft.toml +5 -0
  112. examples/task_apps/sokoban/tests/__init__.py +2 -0
  113. examples/task_apps/sokoban/tests/integration/__init__.py +2 -0
  114. examples/task_apps/sokoban/tests/unit/__init__.py +2 -0
  115. examples/task_apps/verilog/eval_groq_qwen32b.toml +8 -4
  116. examples/task_apps/verilog/filter_sft.toml +5 -0
  117. examples/task_apps/verilog/task_app/grpo_verilog.py +258 -23
  118. examples/task_apps/verilog/tests/__init__.py +2 -0
  119. examples/task_apps/verilog/tests/integration/__init__.py +2 -0
  120. examples/task_apps/verilog/tests/integration/test_verilog_eval.py +2 -0
  121. examples/task_apps/verilog/tests/unit/__init__.py +2 -0
  122. examples/vlm/README.md +3 -3
  123. examples/vlm/configs/crafter_vlm_gpt4o.toml +2 -0
  124. examples/vlm/crafter_openai_vlm_agent.py +3 -5
  125. examples/vlm/filter_image_rows.py +1 -1
  126. examples/vlm/run_crafter_vlm_benchmark.py +2 -2
  127. examples/warming_up_to_rl/_utils.py +92 -0
  128. examples/warming_up_to_rl/analyze_trace_db.py +1 -1
  129. examples/warming_up_to_rl/configs/crafter_fft.toml +2 -0
  130. examples/warming_up_to_rl/configs/crafter_fft_4b.toml +2 -0
  131. examples/warming_up_to_rl/configs/eval_fft_qwen4b.toml +2 -0
  132. examples/warming_up_to_rl/configs/eval_groq_qwen32b.toml +2 -0
  133. examples/warming_up_to_rl/configs/eval_modal_qwen4b.toml +2 -1
  134. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +2 -1
  135. examples/warming_up_to_rl/configs/rl_from_ft.toml +2 -0
  136. examples/warming_up_to_rl/export_trace_sft.py +174 -60
  137. examples/warming_up_to_rl/groq_test.py +2 -0
  138. examples/warming_up_to_rl/readme.md +63 -132
  139. examples/warming_up_to_rl/run_fft_and_save.py +1 -1
  140. examples/warming_up_to_rl/run_local_rollout.py +2 -0
  141. examples/warming_up_to_rl/run_local_rollout_modal.py +2 -0
  142. examples/warming_up_to_rl/run_local_rollout_parallel.py +2 -0
  143. examples/warming_up_to_rl/run_local_rollout_traced.py +2 -0
  144. examples/warming_up_to_rl/run_rl_and_save.py +1 -1
  145. examples/warming_up_to_rl/run_rollout_remote.py +2 -0
  146. examples/warming_up_to_rl/task_app/README.md +42 -0
  147. examples/warming_up_to_rl/task_app/grpo_crafter.py +696 -0
  148. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +135 -0
  149. examples/warming_up_to_rl/task_app/synth_envs_hosted/README.md +173 -0
  150. examples/warming_up_to_rl/task_app/synth_envs_hosted/__init__.py +5 -0
  151. examples/warming_up_to_rl/task_app/synth_envs_hosted/branching.py +143 -0
  152. examples/warming_up_to_rl/task_app/synth_envs_hosted/environment_routes.py +1226 -0
  153. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/__init__.py +1 -0
  154. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/__init__.py +6 -0
  155. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/app.py +1 -0
  156. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/environment.py +522 -0
  157. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +478 -0
  158. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +108 -0
  159. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/shared.py +305 -0
  160. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/tools.py +47 -0
  161. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +204 -0
  162. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/__init__.py +5 -0
  163. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +618 -0
  164. examples/warming_up_to_rl/task_app/synth_envs_hosted/main.py +100 -0
  165. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +1081 -0
  166. examples/warming_up_to_rl/task_app/synth_envs_hosted/registry.py +195 -0
  167. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +1861 -0
  168. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/__init__.py +5 -0
  169. examples/warming_up_to_rl/task_app/synth_envs_hosted/storage/volume.py +211 -0
  170. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_agents.py +161 -0
  171. examples/warming_up_to_rl/task_app/synth_envs_hosted/test_service.py +137 -0
  172. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +62 -0
  173. synth_ai/__init__.py +44 -30
  174. synth_ai/_utils/__init__.py +47 -0
  175. synth_ai/_utils/base_url.py +10 -0
  176. synth_ai/_utils/http.py +10 -0
  177. synth_ai/_utils/prompts.py +10 -0
  178. synth_ai/_utils/task_app_state.py +12 -0
  179. synth_ai/_utils/user_config.py +10 -0
  180. synth_ai/api/models/supported.py +145 -7
  181. synth_ai/api/train/__init__.py +13 -1
  182. synth_ai/api/train/cli.py +30 -7
  183. synth_ai/api/train/config_finder.py +18 -11
  184. synth_ai/api/train/env_resolver.py +13 -10
  185. synth_ai/cli/__init__.py +66 -49
  186. synth_ai/cli/_modal_wrapper.py +9 -6
  187. synth_ai/cli/_typer_patch.py +0 -2
  188. synth_ai/cli/_validate_task_app.py +22 -4
  189. synth_ai/cli/legacy_root_backup.py +3 -1
  190. synth_ai/cli/lib/__init__.py +10 -0
  191. synth_ai/cli/lib/task_app_discovery.py +7 -0
  192. synth_ai/cli/lib/task_app_env.py +518 -0
  193. synth_ai/cli/recent.py +1 -0
  194. synth_ai/cli/setup.py +266 -0
  195. synth_ai/cli/task_app_deploy.py +16 -0
  196. synth_ai/cli/task_app_list.py +25 -0
  197. synth_ai/cli/task_app_modal_serve.py +16 -0
  198. synth_ai/cli/task_app_serve.py +18 -0
  199. synth_ai/cli/task_apps.py +392 -141
  200. synth_ai/cli/train.py +18 -0
  201. synth_ai/cli/tui.py +62 -0
  202. synth_ai/demos/__init__.py +10 -0
  203. synth_ai/demos/core/__init__.py +28 -1
  204. synth_ai/demos/crafter/__init__.py +1 -0
  205. synth_ai/demos/crafter/crafter_fft_4b.toml +55 -0
  206. synth_ai/demos/crafter/grpo_crafter_task_app.py +185 -0
  207. synth_ai/demos/crafter/rl_from_base_qwen4b.toml +74 -0
  208. synth_ai/demos/demo_registry.py +176 -0
  209. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
  210. synth_ai/demos/math/__init__.py +1 -0
  211. synth_ai/demos/math/_common.py +16 -0
  212. synth_ai/demos/math/app.py +38 -0
  213. synth_ai/demos/math/config.toml +76 -0
  214. synth_ai/demos/math/deploy_modal.py +54 -0
  215. synth_ai/demos/math/modal_task_app.py +702 -0
  216. synth_ai/demos/math/task_app_entry.py +51 -0
  217. synth_ai/environments/environment/core.py +7 -1
  218. synth_ai/environments/examples/bandit/engine.py +0 -1
  219. synth_ai/environments/examples/bandit/environment.py +0 -1
  220. synth_ai/environments/examples/crafter_classic/environment.py +1 -1
  221. synth_ai/environments/examples/verilog/engine.py +76 -10
  222. synth_ai/environments/examples/wordle/environment.py +0 -1
  223. synth_ai/evals/base.py +16 -5
  224. synth_ai/evals/client.py +1 -1
  225. synth_ai/inference/client.py +1 -1
  226. synth_ai/learning/client.py +1 -1
  227. synth_ai/learning/health.py +1 -1
  228. synth_ai/learning/jobs.py +1 -1
  229. synth_ai/learning/rl/client.py +1 -1
  230. synth_ai/learning/rl/env_keys.py +1 -1
  231. synth_ai/learning/rl/secrets.py +1 -1
  232. synth_ai/learning/sft/client.py +1 -1
  233. synth_ai/learning/sft/data.py +407 -4
  234. synth_ai/learning/validators.py +4 -1
  235. synth_ai/task/__init__.py +11 -1
  236. synth_ai/task/apps/__init__.py +5 -2
  237. synth_ai/task/config.py +259 -0
  238. synth_ai/task/contracts.py +15 -2
  239. synth_ai/task/rubrics/__init__.py +4 -2
  240. synth_ai/task/rubrics/loaders.py +27 -4
  241. synth_ai/task/rubrics/scoring.py +3 -0
  242. synth_ai/task/rubrics.py +219 -0
  243. synth_ai/task/trace_correlation_helpers.py +328 -0
  244. synth_ai/task/tracing_utils.py +14 -3
  245. synth_ai/task/validators.py +145 -2
  246. synth_ai/tracing_v3/config.py +15 -13
  247. synth_ai/tracing_v3/constants.py +21 -0
  248. synth_ai/tracing_v3/db_config.py +3 -1
  249. synth_ai/tracing_v3/decorators.py +10 -7
  250. synth_ai/tracing_v3/session_tracer.py +10 -0
  251. synth_ai/tracing_v3/turso/daemon.py +2 -2
  252. synth_ai/tracing_v3/turso/native_manager.py +108 -77
  253. synth_ai/tracing_v3/utils.py +1 -1
  254. synth_ai/tui/__init__.py +5 -0
  255. synth_ai/tui/__main__.py +13 -0
  256. synth_ai/tui/cli/__init__.py +1 -0
  257. synth_ai/tui/cli/query_experiments.py +164 -0
  258. synth_ai/tui/cli/query_experiments_v3.py +164 -0
  259. synth_ai/tui/dashboard.py +911 -0
  260. synth_ai/utils/__init__.py +101 -0
  261. synth_ai/utils/base_url.py +94 -0
  262. synth_ai/utils/cli.py +131 -0
  263. synth_ai/utils/env.py +287 -0
  264. synth_ai/utils/http.py +169 -0
  265. synth_ai/utils/modal.py +308 -0
  266. synth_ai/utils/process.py +212 -0
  267. synth_ai/utils/prompts.py +39 -0
  268. synth_ai/utils/sqld.py +122 -0
  269. synth_ai/utils/task_app_discovery.py +882 -0
  270. synth_ai/utils/task_app_env.py +186 -0
  271. synth_ai/utils/task_app_state.py +318 -0
  272. synth_ai/utils/user_config.py +137 -0
  273. synth_ai/v0/config/__init__.py +1 -5
  274. synth_ai/v0/config/base_url.py +1 -7
  275. synth_ai/v0/tracing/config.py +1 -1
  276. synth_ai/v0/tracing/decorators.py +1 -1
  277. synth_ai/v0/tracing/upload.py +1 -1
  278. synth_ai/v0/tracing_v1/config.py +1 -1
  279. synth_ai/v0/tracing_v1/decorators.py +1 -1
  280. synth_ai/v0/tracing_v1/upload.py +1 -1
  281. {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/METADATA +85 -31
  282. {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/RECORD +286 -135
  283. synth_ai/cli/man.py +0 -106
  284. synth_ai/compound/cais.py +0 -0
  285. synth_ai/core/experiment.py +0 -13
  286. synth_ai/core/system.py +0 -15
  287. synth_ai/demo_registry.py +0 -295
  288. synth_ai/handshake.py +0 -109
  289. synth_ai/http.py +0 -26
  290. {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/WHEEL +0 -0
  291. {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/entry_points.txt +0 -0
  292. {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/licenses/LICENSE +0 -0
  293. {synth_ai-0.2.13.dev2.dist-info → synth_ai-0.2.16.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,201 @@
1
+ #!/usr/bin/env python3
2
+ """
3
+ Quick test script to demonstrate image validation.
4
+
5
+ Run from synth-ai root:
6
+ uv run python examples/qwen_vl/test_image_validation.py
7
+ """
8
+
9
+ from synth_ai.learning.sft.data import coerce_example, validate_vision_example
10
+
11
+ # Test cases
12
+ test_cases = [
13
+ {
14
+ "name": "Valid - HTTP URL",
15
+ "data": {
16
+ "messages": [
17
+ {
18
+ "role": "user",
19
+ "content": [
20
+ {"type": "text", "text": "Describe this"},
21
+ {"type": "image_url", "image_url": {"url": "https://example.com/image.jpg"}},
22
+ ],
23
+ },
24
+ {"role": "assistant", "content": "A beautiful image"},
25
+ ]
26
+ },
27
+ "should_pass": True,
28
+ },
29
+ {
30
+ "name": "Valid - Base64",
31
+ "data": {
32
+ "messages": [
33
+ {
34
+ "role": "user",
35
+ "content": [
36
+ {"type": "image_url", "image_url": {"url": "..."}},
37
+ ],
38
+ },
39
+ {"role": "assistant", "content": "An image"},
40
+ ]
41
+ },
42
+ "should_pass": True,
43
+ },
44
+ {
45
+ "name": "Invalid - Empty URL",
46
+ "data": {
47
+ "messages": [
48
+ {
49
+ "role": "user",
50
+ "content": [
51
+ {"type": "text", "text": "What's this?"},
52
+ {"type": "image_url", "image_url": {"url": ""}}, # Empty!
53
+ ],
54
+ },
55
+ {"role": "assistant", "content": "Response"},
56
+ ]
57
+ },
58
+ "should_pass": False,
59
+ },
60
+ {
61
+ "name": "Invalid - Missing URL field",
62
+ "data": {
63
+ "messages": [
64
+ {
65
+ "role": "user",
66
+ "content": [
67
+ {"type": "image_url", "image_url": {}}, # No url field!
68
+ ],
69
+ },
70
+ {"role": "assistant", "content": "Response"},
71
+ ]
72
+ },
73
+ "should_pass": False,
74
+ },
75
+ {
76
+ "name": "Invalid - Null URL",
77
+ "data": {
78
+ "messages": [
79
+ {
80
+ "role": "user",
81
+ "content": [
82
+ {"type": "image_url", "image_url": {"url": None}}, # Null!
83
+ ],
84
+ },
85
+ {"role": "assistant", "content": "Response"},
86
+ ]
87
+ },
88
+ "should_pass": False,
89
+ },
90
+ {
91
+ "name": "Invalid - Whitespace URL",
92
+ "data": {
93
+ "messages": [
94
+ {
95
+ "role": "user",
96
+ "content": [
97
+ {"type": "image_url", "image_url": {"url": " "}}, # Whitespace!
98
+ ],
99
+ },
100
+ {"role": "assistant", "content": "Response"},
101
+ ]
102
+ },
103
+ "should_pass": False,
104
+ },
105
+ {
106
+ "name": "Invalid - Mixed valid and invalid",
107
+ "data": {
108
+ "messages": [
109
+ {
110
+ "role": "user",
111
+ "content": [
112
+ {"type": "image_url", "image_url": {"url": "https://example.com/valid.jpg"}},
113
+ {"type": "image_url", "image_url": {"url": ""}}, # One invalid!
114
+ ],
115
+ },
116
+ {"role": "assistant", "content": "Response"},
117
+ ]
118
+ },
119
+ "should_pass": False,
120
+ },
121
+ {
122
+ "name": "Invalid - Non-string URL",
123
+ "data": {
124
+ "messages": [
125
+ {
126
+ "role": "user",
127
+ "content": [
128
+ {"type": "image_url", "image_url": {"url": 12345}}, # Integer!
129
+ ],
130
+ },
131
+ {"role": "assistant", "content": "Response"},
132
+ ]
133
+ },
134
+ "should_pass": False,
135
+ },
136
+ ]
137
+
138
+
139
+ def main():
140
+ print("=" * 80)
141
+ print("IMAGE VALIDATION TEST")
142
+ print("=" * 80)
143
+ print()
144
+
145
+ passed = 0
146
+ failed = 0
147
+
148
+ for test in test_cases:
149
+ name = test["name"]
150
+ data = test["data"]
151
+ should_pass = test["should_pass"]
152
+
153
+ try:
154
+ example = coerce_example(data)
155
+ is_valid, error = validate_vision_example(example, require_images=True)
156
+
157
+ if should_pass:
158
+ if is_valid:
159
+ print(f"✅ PASS: {name}")
160
+ print(f" → Correctly accepted valid example")
161
+ passed += 1
162
+ else:
163
+ print(f"❌ FAIL: {name}")
164
+ print(f" → Should pass but got error: {error}")
165
+ failed += 1
166
+ else:
167
+ if not is_valid:
168
+ print(f"✅ PASS: {name}")
169
+ print(f" → Correctly rejected: {error}")
170
+ passed += 1
171
+ else:
172
+ print(f"❌ FAIL: {name}")
173
+ print(f" → Should fail but passed validation")
174
+ failed += 1
175
+ except Exception as exc:
176
+ if should_pass:
177
+ print(f"❌ FAIL: {name}")
178
+ print(f" → Unexpected exception: {exc}")
179
+ failed += 1
180
+ else:
181
+ print(f"✅ PASS: {name}")
182
+ print(f" → Correctly raised exception: {exc}")
183
+ passed += 1
184
+
185
+ print()
186
+
187
+ print("=" * 80)
188
+ print(f"RESULTS: {passed}/{len(test_cases)} passed, {failed}/{len(test_cases)} failed")
189
+ print("=" * 80)
190
+
191
+ if failed == 0:
192
+ print("🎉 All tests passed!")
193
+ return 0
194
+ else:
195
+ print(f"⚠️ {failed} test(s) failed")
196
+ return 1
197
+
198
+
199
+ if __name__ == "__main__":
200
+ exit(main())
201
+
@@ -0,0 +1,110 @@
1
+ """Generate test vision SFT dataset for Qwen3-VL-2B."""
2
+
3
+ import base64
4
+ import json
5
+ from pathlib import Path
6
+ from io import BytesIO
7
+
8
+ try:
9
+ from PIL import Image
10
+ except ImportError:
11
+ print("❌ PIL not available")
12
+ exit(1)
13
+
14
+ BASE_DIR = Path(__file__).resolve().parent
15
+
16
+ def create_test_image(color: str) -> str:
17
+ """Create a 64x64 colored square and return base64 data URL."""
18
+ colors = {
19
+ "red": (255, 0, 0),
20
+ "blue": (0, 0, 255),
21
+ "green": (0, 255, 0),
22
+ "yellow": (255, 255, 0),
23
+ "purple": (128, 0, 128),
24
+ }
25
+
26
+ img = Image.new('RGB', (64, 64), color=colors[color])
27
+ buffer = BytesIO()
28
+ img.save(buffer, format='PNG')
29
+ b64 = base64.b64encode(buffer.getvalue()).decode('utf-8')
30
+ return f"data:image/png;base64,{b64}"
31
+
32
+
33
+ def main():
34
+ output_dir = BASE_DIR / "test_data"
35
+ output_dir.mkdir(parents=True, exist_ok=True)
36
+
37
+ output_file = output_dir / "vision_sft_test.jsonl"
38
+
39
+ # Create 10 training examples with different colored images
40
+ examples = []
41
+ colors = ["red", "blue", "green", "yellow", "purple"]
42
+
43
+ for i, color in enumerate(colors):
44
+ # Simple color identification
45
+ examples.append({
46
+ "messages": [
47
+ {
48
+ "role": "user",
49
+ "content": [
50
+ {"type": "text", "text": "What color is this image? Answer in one word."},
51
+ {"type": "image_url", "image_url": {"url": create_test_image(color)}},
52
+ ],
53
+ },
54
+ {
55
+ "role": "assistant",
56
+ "content": color.capitalize(),
57
+ },
58
+ ],
59
+ "metadata": {"example_id": f"color_{i}", "type": "color_id"},
60
+ })
61
+
62
+ # Describe the image
63
+ examples.append({
64
+ "messages": [
65
+ {
66
+ "role": "user",
67
+ "content": [
68
+ {"type": "text", "text": "Describe this image briefly."},
69
+ {"type": "image_url", "image_url": {"url": create_test_image(color)}},
70
+ ],
71
+ },
72
+ {
73
+ "role": "assistant",
74
+ "content": f"This is a {color} colored square image.",
75
+ },
76
+ ],
77
+ "metadata": {"example_id": f"describe_{i}", "type": "description"},
78
+ })
79
+
80
+ # Write JSONL
81
+ with output_file.open("w", encoding="utf-8") as f:
82
+ for example in examples:
83
+ f.write(json.dumps(example) + "\n")
84
+
85
+ print(f"✅ Created {len(examples)} vision SFT examples")
86
+ print(f" Output: {output_file}")
87
+ print(f" Size: {output_file.stat().st_size / 1024:.1f} KB")
88
+
89
+ # Validate with SDK
90
+ try:
91
+ from synth_ai.learning.sft.data import load_jsonl, validate_vision_example
92
+
93
+ loaded = load_jsonl(output_file, min_messages=1)
94
+ print(f" Loaded: {len(loaded)} examples")
95
+
96
+ valid_count = 0
97
+ for ex in loaded:
98
+ is_valid, error = validate_vision_example(ex, require_images=True)
99
+ if is_valid:
100
+ valid_count += 1
101
+ else:
102
+ print(f" ⚠️ Invalid example: {error}")
103
+
104
+ print(f" Valid: {valid_count}/{len(loaded)}")
105
+ except ImportError:
106
+ print(" (SDK validation skipped - synth_ai not available)")
107
+
108
+
109
+ if __name__ == "__main__":
110
+ main()
examples/rl/README.md CHANGED
@@ -52,7 +52,7 @@ uvx synth-ai serve math-single-step \
52
52
  --port 8101 \
53
53
  --env-file examples/rl/.env \
54
54
  --trace traces/math \
55
- --trace-db traces/math/synth_ai.db
55
+ --trace-db traces/math/task_app_traces_<timestamp>.db
56
56
  ```
57
57
 
58
58
  Deploy or serve on Modal using the same env file; the registration includes a `ModalDeploymentConfig` that installs the `datasets` package automatically.
@@ -0,0 +1,17 @@
1
+ type = "rl"
2
+
3
+ provider = "synth"
4
+ task_app_url = "http://localhost:8101"
5
+ model = "Qwen/Qwen3-1.7B"
6
+ split = "validation"
7
+ num_episodes = 50
8
+ seed_start = 0
9
+
10
+ [policy]
11
+ inference_url = "https://agent-learning.onrender.com/api/inference"
12
+ max_tokens = 128
13
+ temperature = 0.0
14
+
15
+ # Optionally supply custom headers
16
+ # [policy.headers]
17
+ # Authorization = "Bearer ..."
@@ -0,0 +1,13 @@
1
+ type = "rl"
2
+
3
+ provider = "synth"
4
+ task_app_url = "https://your-math-task.modal.run"
5
+ model = "rl:REPLACE_WITH_JOB_ID"
6
+ split = "test"
7
+ num_episodes = 200
8
+ seed_start = 100000
9
+
10
+ [policy]
11
+ inference_url = "https://your-inference-host"
12
+ max_tokens = 128
13
+ temperature = 0.0
@@ -0,0 +1,37 @@
1
+ type = "rl"
2
+
3
+ [services]
4
+ task_url = "https://your-math-task.modal.run"
5
+
6
+ [model]
7
+ base = "Qwen/Qwen3-4B"
8
+
9
+ [policy]
10
+ model = "Qwen/Qwen3-4B"
11
+ inference_url = "https://your-inference-host"
12
+ max_tokens = 128
13
+ temperature = 0.0
14
+
15
+ [data]
16
+ split = "train"
17
+ seed_start = 0
18
+ episodes_per_iteration = 2048
19
+ evaluation_split = "validation"
20
+ evaluation_episodes = 256
21
+
22
+ [training]
23
+ max_turns = 1
24
+ ops = ["agent", "env"]
25
+ batch_size = 128
26
+ group_size = 1024
27
+ reward_positive = 1.0
28
+ reward_negative_no_tool = -1.0
29
+ reward_negative_no_answer = -0.5
30
+ learning_rate = 5e-6
31
+
32
+ [compute]
33
+ gpu_type = "A10G"
34
+ gpu_count = 4
35
+
36
+ [tags]
37
+ experiment = "math_single_step"
@@ -0,0 +1,76 @@
1
+ type = "rl"
2
+
3
+ [algorithm]
4
+ type = "online"
5
+ method = "policy_gradient"
6
+ variety = "gspo"
7
+
8
+ [services]
9
+ task_url = "http://localhost:8101"
10
+
11
+ [model]
12
+ base = "Qwen/Qwen3-1.7B"
13
+
14
+ [policy]
15
+ model = "Qwen/Qwen3-1.7B"
16
+ inference_url = "https://agent-learning.onrender.com/api/inference"
17
+ max_tokens = 1028
18
+ temperature = 0.2
19
+
20
+ [data]
21
+ split = "train"
22
+ seed_start = 0
23
+ episodes_per_iteration = 1280 # 8 per group * 4 groups per batch * 2 batches per step * 20 steps
24
+ evaluation_split = "validation"
25
+ evaluation_episodes = 50
26
+
27
+ [training]
28
+ max_turns = 1
29
+ ops = ["agent", "env"]
30
+ batch_size = 2
31
+ group_size = 16
32
+ reward_positive = 1.0
33
+ reward_negative_no_tool = -1.0
34
+ reward_negative_no_answer = -0.5
35
+ learning_rate = 5e-6
36
+ log_interval = 1
37
+ weight_sync_interval = 1
38
+
39
+ [training.weight_sync]
40
+ enable = true
41
+ targets = ["policy"]
42
+
43
+ [compute]
44
+ gpu_type = "H100"
45
+ gpu_count = 4
46
+
47
+ [topology]
48
+ type = "single_node_split"
49
+ gpus_for_vllm = 2
50
+ gpus_for_training = 1
51
+ gpus_for_ref = 1
52
+ tensor_parallel = 1
53
+
54
+ [vllm]
55
+ tensor_parallel_size = 1
56
+ max_model_len = 4096
57
+
58
+ [reference]
59
+ placement = "dedicated"
60
+ port = 8002
61
+ tp = 1
62
+ health_max_wait_s = 180
63
+ health_interval_ms = 300
64
+
65
+ [rollout]
66
+ policy_name = "math-single-step"
67
+ max_turns = 1
68
+ episodes_per_batch = 32 # group_size * batch_size
69
+
70
+ [evaluation]
71
+ instances = 32
72
+ every_n_iters = 10
73
+ seeds = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15]
74
+
75
+ [tags]
76
+ experiment = "math_single_step_qwen17"
@@ -0,0 +1,37 @@
1
+ type = "rl"
2
+
3
+ [services]
4
+ task_url = "https://your-math-task.modal.run"
5
+
6
+ [model]
7
+ source = "ft:REPLACE_WITH_MODEL_ID"
8
+
9
+ [policy]
10
+ model = "ft:REPLACE_WITH_MODEL_ID"
11
+ inference_url = "https://your-inference-host"
12
+ max_tokens = 128
13
+ temperature = 0.0
14
+
15
+ [data]
16
+ split = "train"
17
+ seed_start = 0
18
+ episodes_per_iteration = 2048
19
+ evaluation_split = "validation"
20
+ evaluation_episodes = 256
21
+
22
+ [training]
23
+ max_turns = 1
24
+ ops = ["agent", "env"]
25
+ batch_size = 128
26
+ group_size = 1024
27
+ reward_positive = 1.0
28
+ reward_negative_no_tool = -1.0
29
+ reward_negative_no_answer = -0.5
30
+ learning_rate = 5e-6
31
+
32
+ [compute]
33
+ gpu_type = "A10G"
34
+ gpu_count = 4
35
+
36
+ [tags]
37
+ experiment = "math_single_step_from_fft"