synth-ai 0.2.16__py3-none-any.whl → 0.2.19__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of synth-ai might be problematic. Click here for more details.

Files changed (299) hide show
  1. examples/analyze_semantic_words.sh +2 -2
  2. examples/baseline/banking77_baseline.py +204 -0
  3. examples/baseline/crafter_baseline.py +407 -0
  4. examples/baseline/pokemon_red_baseline.py +326 -0
  5. examples/baseline/simple_baseline.py +56 -0
  6. examples/baseline/warming_up_to_rl_baseline.py +239 -0
  7. examples/blog_posts/gepa/README.md +355 -0
  8. examples/blog_posts/gepa/configs/banking77_gepa_local.toml +95 -0
  9. examples/blog_posts/gepa/configs/banking77_gepa_test.toml +82 -0
  10. examples/blog_posts/gepa/configs/banking77_mipro_local.toml +52 -0
  11. examples/blog_posts/gepa/configs/hotpotqa_gepa_local.toml +59 -0
  12. examples/blog_posts/gepa/configs/hotpotqa_gepa_qwen.toml +36 -0
  13. examples/blog_posts/gepa/configs/hotpotqa_mipro_local.toml +53 -0
  14. examples/blog_posts/gepa/configs/hover_gepa_local.toml +59 -0
  15. examples/blog_posts/gepa/configs/hover_gepa_qwen.toml +36 -0
  16. examples/blog_posts/gepa/configs/hover_mipro_local.toml +53 -0
  17. examples/blog_posts/gepa/configs/ifbench_gepa_local.toml +59 -0
  18. examples/blog_posts/gepa/configs/ifbench_gepa_qwen.toml +36 -0
  19. examples/blog_posts/gepa/configs/ifbench_mipro_local.toml +53 -0
  20. examples/blog_posts/gepa/configs/pupa_gepa_local.toml +60 -0
  21. examples/blog_posts/gepa/configs/pupa_mipro_local.toml +54 -0
  22. examples/blog_posts/gepa/deploy_banking77_task_app.sh +41 -0
  23. examples/blog_posts/gepa/gepa_baseline.py +204 -0
  24. examples/blog_posts/gepa/query_prompts_example.py +97 -0
  25. examples/blog_posts/gepa/run_gepa_banking77.sh +87 -0
  26. examples/blog_posts/gepa/task_apps.py +105 -0
  27. examples/blog_posts/gepa/test_gepa_local.sh +67 -0
  28. examples/blog_posts/gepa/verify_banking77_setup.sh +123 -0
  29. examples/blog_posts/pokemon_vl/README.md +98 -0
  30. examples/blog_posts/pokemon_vl/configs/eval_gpt5nano.toml +26 -0
  31. examples/blog_posts/pokemon_vl/configs/eval_qwen3_vl.toml +27 -0
  32. examples/blog_posts/pokemon_vl/configs/eval_rl_final.toml +24 -0
  33. examples/blog_posts/pokemon_vl/configs/filter_high_reward.toml +10 -0
  34. examples/blog_posts/pokemon_vl/configs/train_rl_from_sft.toml +43 -0
  35. examples/blog_posts/pokemon_vl/configs/train_sft_qwen4b_vl.toml +40 -0
  36. examples/blog_posts/pokemon_vl/extract_images.py +239 -0
  37. examples/blog_posts/pokemon_vl/pokemon_vl_baseline.py +326 -0
  38. examples/blog_posts/pokemon_vl/run_eval_extract_images.py +209 -0
  39. examples/blog_posts/pokemon_vl/run_qwen_eval_extract_images.py +212 -0
  40. examples/blog_posts/pokemon_vl/text_box_analysis.md +106 -0
  41. examples/blog_posts/warming_up_to_rl/ARCHITECTURE.md +195 -0
  42. examples/blog_posts/warming_up_to_rl/FINAL_TEST_RESULTS.md +127 -0
  43. examples/blog_posts/warming_up_to_rl/INFERENCE_SUCCESS.md +132 -0
  44. examples/blog_posts/warming_up_to_rl/README.md +158 -0
  45. examples/blog_posts/warming_up_to_rl/SMOKE_TESTING.md +164 -0
  46. examples/blog_posts/warming_up_to_rl/SMOKE_TEST_COMPLETE.md +253 -0
  47. examples/blog_posts/warming_up_to_rl/configs/eval_baseline_qwen32b_10x20.toml +25 -0
  48. examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b.toml +25 -0
  49. examples/blog_posts/warming_up_to_rl/configs/eval_ft_qwen4b_10x20.toml +26 -0
  50. examples/blog_posts/warming_up_to_rl/configs/eval_groq_qwen32b.toml +25 -0
  51. examples/blog_posts/warming_up_to_rl/configs/eval_openai_gpt_oss_120b.toml +29 -0
  52. examples/blog_posts/warming_up_to_rl/configs/filter_high_reward_dataset.toml +10 -0
  53. examples/blog_posts/warming_up_to_rl/configs/smoke_test.toml +75 -0
  54. examples/blog_posts/warming_up_to_rl/configs/train_rl_from_sft.toml +91 -0
  55. examples/blog_posts/warming_up_to_rl/configs/train_sft_qwen4b.toml +40 -0
  56. examples/blog_posts/warming_up_to_rl/warming_up_to_rl_baseline.py +187 -0
  57. examples/dev/qwen3_32b_qlora_4xh100.toml +5 -0
  58. examples/multi_step/configs/VERILOG_REWARDS.md +4 -0
  59. examples/multi_step/configs/VERILOG_RL_CHECKLIST.md +4 -0
  60. examples/multi_step/configs/crafter_rl_outcome.toml +2 -1
  61. examples/multi_step/configs/crafter_rl_stepwise_hosted_judge.toml +65 -107
  62. examples/multi_step/configs/crafter_rl_stepwise_shaped.toml +2 -1
  63. examples/multi_step/configs/crafter_rl_stepwise_simple.toml +2 -1
  64. examples/multi_step/configs/crafter_rl_stepwise_simple_NEW_FORMAT.toml +105 -0
  65. examples/multi_step/configs/verilog_rl_lora.toml +80 -123
  66. examples/qwen_coder/configs/coder_lora_30b.toml +1 -3
  67. examples/qwen_coder/configs/coder_lora_4b.toml +4 -1
  68. examples/qwen_coder/configs/coder_lora_small.toml +1 -3
  69. examples/qwen_vl/README.md +10 -12
  70. examples/qwen_vl/SETUP_COMPLETE.md +7 -8
  71. examples/qwen_vl/VISION_TESTS_COMPLETE.md +2 -3
  72. examples/qwen_vl/collect_data_via_cli.md +76 -84
  73. examples/qwen_vl/collect_vision_traces.py +4 -4
  74. examples/qwen_vl/configs/crafter_rl_vision_qwen3vl4b.toml +40 -57
  75. examples/qwen_vl/configs/crafter_vlm_sft_example.toml +1 -2
  76. examples/qwen_vl/configs/eval_gpt4o_mini_vision.toml +20 -37
  77. examples/qwen_vl/configs/eval_gpt5nano_vision.toml +21 -40
  78. examples/qwen_vl/configs/eval_qwen3vl_vision.toml +26 -0
  79. examples/qwen_vl/configs/{filter_qwen2vl_sft.toml → filter_qwen3vl_sft.toml} +4 -5
  80. examples/qwen_vl/configs/filter_vision_sft.toml +2 -3
  81. examples/qwen_vl/crafter_qwen_vl_agent.py +5 -5
  82. examples/qwen_vl/run_vision_comparison.sh +6 -7
  83. examples/rl/README.md +5 -5
  84. examples/rl/configs/rl_from_base_qwen.toml +26 -1
  85. examples/rl/configs/rl_from_base_qwen17.toml +6 -2
  86. examples/rl/task_app/README.md +1 -2
  87. examples/rl/task_app/math_single_step.py +2 -2
  88. examples/run_crafter_demo.sh +2 -2
  89. examples/sft/README.md +1 -1
  90. examples/sft/configs/crafter_fft_qwen0p6b.toml +4 -1
  91. examples/sft/configs/crafter_lora_qwen0p6b.toml +4 -1
  92. examples/swe/task_app/README.md +32 -2
  93. examples/swe/task_app/grpo_swe_mini.py +4 -0
  94. examples/swe/task_app/hosted/envs/crafter/react_agent.py +1 -1
  95. examples/swe/task_app/hosted/envs/mini_swe/environment.py +37 -10
  96. examples/swe/task_app/hosted/inference/openai_client.py +4 -38
  97. examples/swe/task_app/hosted/policy_routes.py +17 -0
  98. examples/swe/task_app/hosted/rollout.py +4 -2
  99. examples/swe/task_app/morph_backend.py +178 -0
  100. examples/task_apps/banking77/__init__.py +6 -0
  101. examples/task_apps/banking77/banking77_task_app.py +841 -0
  102. examples/task_apps/banking77/deploy_wrapper.py +46 -0
  103. examples/task_apps/crafter/CREATE_SFT_DATASET.md +4 -0
  104. examples/task_apps/crafter/FILTER_COMMAND_STATUS.md +4 -0
  105. examples/task_apps/crafter/FILTER_COMMAND_SUCCESS.md +4 -0
  106. examples/task_apps/crafter/task_app/README.md +1 -1
  107. examples/task_apps/crafter/task_app/grpo_crafter.py +90 -5
  108. examples/task_apps/crafter/task_app/grpo_crafter_task_app.py +1 -1
  109. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/policy.py +4 -26
  110. examples/task_apps/crafter/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -2
  111. examples/task_apps/crafter/task_app/synth_envs_hosted/hosted_app.py +49 -0
  112. examples/task_apps/crafter/task_app/synth_envs_hosted/inference/openai_client.py +372 -107
  113. examples/task_apps/crafter/task_app/synth_envs_hosted/policy_routes.py +81 -12
  114. examples/task_apps/crafter/task_app/synth_envs_hosted/rollout.py +82 -11
  115. examples/task_apps/crafter/task_app/synth_envs_hosted/utils.py +194 -1
  116. examples/task_apps/enron/task_app/grpo_enron_task_app.py +1 -1
  117. examples/task_apps/gepa_benchmarks/__init__.py +7 -0
  118. examples/task_apps/gepa_benchmarks/common.py +260 -0
  119. examples/task_apps/gepa_benchmarks/hotpotqa_task_app.py +507 -0
  120. examples/task_apps/gepa_benchmarks/hover_task_app.py +436 -0
  121. examples/task_apps/gepa_benchmarks/ifbench_task_app.py +563 -0
  122. examples/task_apps/gepa_benchmarks/pupa_task_app.py +460 -0
  123. examples/task_apps/math/README.md +1 -2
  124. examples/task_apps/pokemon_red/README.md +3 -4
  125. examples/task_apps/pokemon_red/README_IMAGE_ONLY_EVAL.md +4 -0
  126. examples/task_apps/pokemon_red/eval_image_only_gpt4o.toml +6 -5
  127. examples/task_apps/pokemon_red/eval_pokemon_red_policy.py +1 -2
  128. examples/task_apps/pokemon_red/task_app.py +288 -39
  129. examples/task_apps/sokoban/README.md +2 -3
  130. examples/task_apps/verilog/eval_groq_qwen32b.toml +12 -14
  131. examples/task_apps/verilog/task_app/grpo_verilog_task_app.py +1 -1
  132. examples/vlm/configs/crafter_vlm_gpt4o.toml +4 -1
  133. examples/warming_up_to_rl/configs/crafter_fft.toml +4 -1
  134. examples/warming_up_to_rl/configs/crafter_fft_4b.toml +0 -2
  135. examples/warming_up_to_rl/configs/rl_from_base_qwen4b.toml +3 -2
  136. examples/warming_up_to_rl/run_local_rollout_traced.py +1 -1
  137. examples/warming_up_to_rl/task_app/README.md +1 -1
  138. examples/warming_up_to_rl/task_app/grpo_crafter.py +185 -5
  139. examples/warming_up_to_rl/task_app/grpo_crafter_task_app.py +1 -1
  140. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/policy.py +3 -27
  141. examples/warming_up_to_rl/task_app/synth_envs_hosted/envs/crafter/react_agent.py +1 -1
  142. examples/warming_up_to_rl/task_app/synth_envs_hosted/hosted_app.py +49 -0
  143. examples/warming_up_to_rl/task_app/synth_envs_hosted/inference/openai_client.py +156 -45
  144. examples/warming_up_to_rl/task_app/synth_envs_hosted/policy_routes.py +37 -4
  145. examples/warming_up_to_rl/task_app/synth_envs_hosted/rollout.py +33 -3
  146. examples/warming_up_to_rl/task_app/synth_envs_hosted/utils.py +67 -0
  147. examples/workflows/math_rl/configs/rl_from_base_qwen.toml +27 -0
  148. examples/workflows/math_rl/configs/rl_from_base_qwen17.toml +6 -0
  149. synth_ai/api/train/builders.py +99 -4
  150. synth_ai/api/train/cli.py +516 -26
  151. synth_ai/api/train/config_finder.py +13 -2
  152. synth_ai/api/train/configs/__init__.py +23 -2
  153. synth_ai/api/train/configs/prompt_learning.py +442 -0
  154. synth_ai/api/train/configs/rl.py +61 -7
  155. synth_ai/api/train/configs/sft.py +6 -2
  156. synth_ai/api/train/configs/shared.py +59 -2
  157. synth_ai/api/train/task_app.py +1 -1
  158. synth_ai/api/train/validators.py +277 -0
  159. synth_ai/auth/credentials.py +119 -0
  160. synth_ai/baseline/__init__.py +25 -0
  161. synth_ai/baseline/config.py +209 -0
  162. synth_ai/baseline/discovery.py +214 -0
  163. synth_ai/baseline/execution.py +146 -0
  164. synth_ai/cli/__init__.py +94 -18
  165. synth_ai/cli/__main__.py +0 -0
  166. synth_ai/cli/claude.py +70 -0
  167. synth_ai/cli/codex.py +84 -0
  168. synth_ai/cli/commands/__init__.py +18 -0
  169. synth_ai/cli/commands/baseline/__init__.py +12 -0
  170. synth_ai/cli/commands/baseline/core.py +637 -0
  171. synth_ai/cli/commands/baseline/list.py +93 -0
  172. synth_ai/cli/commands/demo/__init__.py +6 -0
  173. synth_ai/cli/commands/demo/core.py +163 -0
  174. synth_ai/cli/commands/eval/__init__.py +19 -0
  175. synth_ai/cli/commands/eval/core.py +1112 -0
  176. synth_ai/cli/commands/eval/errors.py +81 -0
  177. synth_ai/cli/commands/eval/validation.py +133 -0
  178. synth_ai/cli/commands/filter/__init__.py +12 -0
  179. synth_ai/cli/commands/filter/core.py +424 -0
  180. synth_ai/cli/commands/filter/errors.py +55 -0
  181. synth_ai/cli/commands/filter/validation.py +77 -0
  182. synth_ai/cli/commands/help/__init__.py +177 -0
  183. synth_ai/cli/commands/help/core.py +72 -0
  184. synth_ai/cli/commands/smoke/__init__.py +7 -0
  185. synth_ai/cli/commands/smoke/core.py +1436 -0
  186. synth_ai/cli/commands/status/__init__.py +64 -0
  187. synth_ai/cli/commands/status/client.py +192 -0
  188. synth_ai/cli/commands/status/config.py +92 -0
  189. synth_ai/cli/commands/status/errors.py +20 -0
  190. synth_ai/cli/commands/status/formatters.py +164 -0
  191. synth_ai/cli/commands/status/subcommands/__init__.py +9 -0
  192. synth_ai/cli/commands/status/subcommands/files.py +79 -0
  193. synth_ai/cli/commands/status/subcommands/jobs.py +334 -0
  194. synth_ai/cli/commands/status/subcommands/models.py +79 -0
  195. synth_ai/cli/commands/status/subcommands/pricing.py +22 -0
  196. synth_ai/cli/commands/status/subcommands/runs.py +81 -0
  197. synth_ai/cli/commands/status/subcommands/summary.py +47 -0
  198. synth_ai/cli/commands/status/subcommands/usage.py +203 -0
  199. synth_ai/cli/commands/status/utils.py +114 -0
  200. synth_ai/cli/commands/train/__init__.py +53 -0
  201. synth_ai/cli/commands/train/core.py +21 -0
  202. synth_ai/cli/commands/train/errors.py +117 -0
  203. synth_ai/cli/commands/train/judge_schemas.py +200 -0
  204. synth_ai/cli/commands/train/judge_validation.py +305 -0
  205. synth_ai/cli/commands/train/validation.py +386 -0
  206. synth_ai/cli/demo.py +30 -158
  207. synth_ai/cli/deploy/__init__.py +43 -0
  208. synth_ai/cli/deploy.py +162 -0
  209. synth_ai/cli/eval/__init__.py +36 -0
  210. synth_ai/cli/eval/core.py +5 -0
  211. synth_ai/cli/eval/errors.py +31 -0
  212. synth_ai/cli/eval/validation.py +5 -0
  213. synth_ai/cli/filter/__init__.py +28 -0
  214. synth_ai/cli/filter/core.py +5 -0
  215. synth_ai/cli/filter/errors.py +23 -0
  216. synth_ai/cli/filter/validation.py +5 -0
  217. synth_ai/cli/legacy_root_backup.py +14 -8
  218. synth_ai/cli/modal_serve/__init__.py +12 -0
  219. synth_ai/cli/modal_serve/core.py +14 -0
  220. synth_ai/cli/modal_serve/errors.py +8 -0
  221. synth_ai/cli/modal_serve/validation.py +11 -0
  222. synth_ai/cli/opencode.py +107 -0
  223. synth_ai/cli/root.py +9 -5
  224. synth_ai/cli/serve/__init__.py +12 -0
  225. synth_ai/cli/serve/core.py +14 -0
  226. synth_ai/cli/serve/errors.py +8 -0
  227. synth_ai/cli/serve/validation.py +11 -0
  228. synth_ai/cli/setup.py +20 -265
  229. synth_ai/cli/status.py +7 -126
  230. synth_ai/cli/task_app_deploy.py +1 -10
  231. synth_ai/cli/task_app_modal_serve.py +4 -9
  232. synth_ai/cli/task_app_serve.py +4 -11
  233. synth_ai/cli/task_apps.py +51 -1480
  234. synth_ai/cli/train/__init__.py +12 -0
  235. synth_ai/cli/train/core.py +21 -0
  236. synth_ai/cli/train/errors.py +8 -0
  237. synth_ai/cli/train/validation.py +24 -0
  238. synth_ai/cli/train.py +1 -14
  239. synth_ai/demos/crafter/grpo_crafter_task_app.py +1 -1
  240. synth_ai/demos/demo_task_apps/crafter/grpo_crafter_task_app.py +1 -1
  241. synth_ai/environments/examples/crafter_classic/engine_deterministic_patch.py +7 -4
  242. synth_ai/environments/examples/crafter_classic/engine_serialization_patch_v3.py +9 -5
  243. synth_ai/environments/examples/crafter_classic/world_config_patch_simple.py +4 -3
  244. synth_ai/environments/examples/red/engine.py +33 -12
  245. synth_ai/environments/examples/red/engine_helpers/reward_components.py +151 -179
  246. synth_ai/environments/examples/red/environment.py +26 -0
  247. synth_ai/environments/examples/red/trace_hooks_v3.py +168 -0
  248. synth_ai/http.py +12 -0
  249. synth_ai/judge_schemas.py +10 -10
  250. synth_ai/learning/__init__.py +10 -0
  251. synth_ai/learning/prompt_learning_client.py +276 -0
  252. synth_ai/learning/prompt_learning_types.py +184 -0
  253. synth_ai/learning/rl/client.py +3 -1
  254. synth_ai/pricing/__init__.py +2 -0
  255. synth_ai/pricing/model_pricing.py +57 -0
  256. synth_ai/streaming/__init__.py +29 -0
  257. synth_ai/streaming/config.py +94 -0
  258. synth_ai/streaming/handlers.py +518 -0
  259. synth_ai/streaming/streamer.py +320 -0
  260. synth_ai/streaming/types.py +95 -0
  261. synth_ai/task/apps/__init__.py +1 -0
  262. synth_ai/task/config.py +2 -0
  263. synth_ai/task/tracing_utils.py +25 -25
  264. synth_ai/task/validators.py +45 -9
  265. synth_ai/task_app_cfgs.py +21 -0
  266. synth_ai/tracing_v3/config.py +162 -19
  267. synth_ai/tracing_v3/constants.py +1 -1
  268. synth_ai/tracing_v3/db_config.py +24 -38
  269. synth_ai/tracing_v3/migration_helper.py +1 -2
  270. synth_ai/tracing_v3/storage/config.py +47 -13
  271. synth_ai/tracing_v3/storage/factory.py +3 -3
  272. synth_ai/tracing_v3/turso/daemon.py +113 -11
  273. synth_ai/tracing_v3/turso/native_manager.py +92 -16
  274. synth_ai/types.py +8 -0
  275. synth_ai/urls.py +11 -0
  276. synth_ai/utils/__init__.py +30 -1
  277. synth_ai/utils/agents.py +74 -0
  278. synth_ai/utils/bin.py +39 -0
  279. synth_ai/utils/cli.py +149 -5
  280. synth_ai/utils/env.py +40 -33
  281. synth_ai/utils/http.py +4 -1
  282. synth_ai/utils/json.py +72 -0
  283. synth_ai/utils/modal.py +285 -3
  284. synth_ai/utils/paths.py +48 -0
  285. synth_ai/utils/uvicorn.py +113 -0
  286. {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/METADATA +109 -6
  287. {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/RECORD +291 -142
  288. examples/qwen_vl/configs/eval_qwen2vl_vision.toml +0 -44
  289. synth_ai/cli/tui.py +0 -62
  290. synth_ai/tui/__init__.py +0 -5
  291. synth_ai/tui/__main__.py +0 -13
  292. synth_ai/tui/cli/__init__.py +0 -1
  293. synth_ai/tui/cli/query_experiments.py +0 -164
  294. synth_ai/tui/cli/query_experiments_v3.py +0 -164
  295. synth_ai/tui/dashboard.py +0 -911
  296. {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/WHEEL +0 -0
  297. {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/entry_points.txt +0 -0
  298. {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/licenses/LICENSE +0 -0
  299. {synth_ai-0.2.16.dist-info → synth_ai-0.2.19.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,386 @@
1
+ """TOML validation logic for train commands (SFT and RL)."""
2
+
3
+ from collections.abc import MutableMapping
4
+ from pathlib import Path
5
+ from typing import Any
6
+
7
+ from pydantic import ValidationError
8
+ from synth_ai.api.train.configs.rl import RLConfig
9
+ from synth_ai.api.train.configs.sft import SFTConfig
10
+ from synth_ai.api.train.utils import load_toml
11
+
12
+ from .errors import (
13
+ InvalidJudgeConfigError,
14
+ InvalidRLConfigError,
15
+ InvalidRubricConfigError,
16
+ InvalidSFTConfigError,
17
+ MissingAlgorithmError,
18
+ MissingComputeError,
19
+ MissingDatasetError,
20
+ MissingModelError,
21
+ TomlParseError,
22
+ UnsupportedAlgorithmError,
23
+ )
24
+ from .judge_validation import extract_and_validate_judge_rubric
25
+
26
+ __all__ = [
27
+ "validate_sft_config",
28
+ "validate_rl_config",
29
+ "load_and_validate_sft",
30
+ "load_and_validate_rl",
31
+ ]
32
+
33
+
34
+ def validate_sft_config(config: MutableMapping[str, Any]) -> dict[str, Any]:
35
+ """Validate SFT configuration from TOML.
36
+
37
+ Args:
38
+ config: Raw configuration dictionary from TOML
39
+
40
+ Returns:
41
+ Validated configuration dictionary
42
+
43
+ Raises:
44
+ InvalidSFTConfigError: If validation fails
45
+ MissingAlgorithmError: If algorithm section is missing or invalid
46
+ MissingModelError: If model is not specified
47
+ MissingDatasetError: If dataset path is not specified
48
+ MissingComputeError: If compute section is missing required fields
49
+ """
50
+ # Check for required top-level sections
51
+ if "algorithm" not in config or not config["algorithm"]:
52
+ raise MissingAlgorithmError(
53
+ detail="[algorithm] section is required for SFT configs"
54
+ )
55
+
56
+ if "job" not in config or not config["job"]:
57
+ raise InvalidSFTConfigError(
58
+ detail="[job] section is required for SFT configs"
59
+ )
60
+
61
+ job = config.get("job", {})
62
+ if not job.get("model"):
63
+ raise MissingModelError(
64
+ detail="[job].model is required (e.g., 'Qwen/Qwen3-4B')"
65
+ )
66
+
67
+ # Check that at least one dataset source is specified
68
+ if not (job.get("data") or job.get("data_path")):
69
+ raise MissingDatasetError(
70
+ detail="[job].data or [job].data_path must be specified",
71
+ hint="Provide path to training JSONL file"
72
+ )
73
+
74
+ # Validate algorithm type, method, and variety
75
+ algorithm = config.get("algorithm", {})
76
+ if algorithm.get("type") not in {"offline", None}:
77
+ raise UnsupportedAlgorithmError(
78
+ algorithm_type=algorithm.get("type", "unknown"),
79
+ expected="offline",
80
+ hint="SFT requires algorithm.type = 'offline'"
81
+ )
82
+
83
+ method = algorithm.get("method", "")
84
+ if method and method not in {"sft", "supervised_finetune"}:
85
+ raise UnsupportedAlgorithmError(
86
+ algorithm_type=method,
87
+ expected="sft or supervised_finetune",
88
+ hint="SFT requires algorithm.method = 'sft' or 'supervised_finetune'"
89
+ )
90
+
91
+ # Validate variety is present
92
+ if not algorithm.get("variety"):
93
+ raise MissingAlgorithmError(
94
+ detail="[algorithm].variety is required (e.g., 'fft', 'lora', 'qlora')"
95
+ )
96
+
97
+ # Validate compute section
98
+ compute = config.get("compute", {})
99
+ if not compute:
100
+ raise MissingComputeError(
101
+ detail="[compute] section is required",
102
+ hint="Specify gpu_type, gpu_count, and nodes"
103
+ )
104
+
105
+ if not compute.get("gpu_type"):
106
+ raise MissingComputeError(
107
+ detail="[compute].gpu_type is required (e.g., 'H100', 'A100')"
108
+ )
109
+
110
+ if not compute.get("gpu_count"):
111
+ raise MissingComputeError(
112
+ detail="[compute].gpu_count is required"
113
+ )
114
+
115
+ # Validate using Pydantic model
116
+ try:
117
+ validated = SFTConfig.from_mapping(config)
118
+ return validated.to_dict()
119
+ except ValidationError as exc:
120
+ errors = []
121
+ for error in exc.errors():
122
+ loc = ".".join(str(x) for x in error["loc"])
123
+ msg = error["msg"]
124
+ errors.append(f" • {loc}: {msg}")
125
+ raise InvalidSFTConfigError(
126
+ detail="Pydantic validation failed:\n" + "\n".join(errors)
127
+ ) from exc
128
+
129
+
130
+ def validate_rl_config(config: MutableMapping[str, Any]) -> dict[str, Any]:
131
+ """Validate RL configuration from TOML.
132
+
133
+ Args:
134
+ config: Raw configuration dictionary from TOML
135
+
136
+ Returns:
137
+ Validated configuration dictionary
138
+
139
+ Raises:
140
+ InvalidRLConfigError: If validation fails
141
+ MissingAlgorithmError: If algorithm section is missing or invalid
142
+ MissingModelError: If model is not specified
143
+ MissingComputeError: If compute section is missing required fields
144
+ """
145
+ # Check for required top-level sections
146
+ if "algorithm" not in config or not config["algorithm"]:
147
+ raise MissingAlgorithmError(
148
+ detail="[algorithm] section is required for RL configs"
149
+ )
150
+
151
+ # Check for model OR policy (policy is the new format)
152
+ if "policy" not in config and "model" not in config:
153
+ raise MissingModelError(
154
+ detail="[policy] or [model] section is required for RL configs"
155
+ )
156
+
157
+ # Validate algorithm type, method, and variety
158
+ algorithm = config.get("algorithm", {})
159
+ if algorithm.get("type") not in {"online", None}:
160
+ raise UnsupportedAlgorithmError(
161
+ algorithm_type=algorithm.get("type", "unknown"),
162
+ expected="online",
163
+ hint="RL requires algorithm.type = 'online'"
164
+ )
165
+
166
+ method = algorithm.get("method", "")
167
+ if method and method not in {"policy_gradient", "ppo", "gspo"}:
168
+ raise UnsupportedAlgorithmError(
169
+ algorithm_type=method,
170
+ expected="policy_gradient",
171
+ hint="RL requires algorithm.method = 'policy_gradient'"
172
+ )
173
+
174
+ # Validate variety is present
175
+ if not algorithm.get("variety"):
176
+ raise MissingAlgorithmError(
177
+ detail="[algorithm].variety is required (e.g., 'gspo', 'ppo')"
178
+ )
179
+
180
+ # Validate model/policy section
181
+ model = config.get("model", {})
182
+ policy = config.get("policy", {})
183
+
184
+ # Use policy if available, otherwise fall back to model
185
+ if policy:
186
+ if not policy.get("model_name") and not policy.get("source"):
187
+ raise MissingModelError(
188
+ detail="[policy].model_name or [policy].source must be specified",
189
+ hint="Provide base model (e.g., 'Qwen/Qwen3-4B') or source checkpoint"
190
+ )
191
+
192
+ if not policy.get("trainer_mode"):
193
+ raise InvalidRLConfigError(
194
+ detail="[policy].trainer_mode is required (e.g., 'full', 'lora')"
195
+ )
196
+
197
+ if not policy.get("label"):
198
+ raise InvalidRLConfigError(
199
+ detail="[policy].label is required (e.g., 'my-rl-model')",
200
+ hint="Provide a descriptive label for this model"
201
+ )
202
+ elif model:
203
+ if not model.get("base") and not model.get("source"):
204
+ raise MissingModelError(
205
+ detail="[model].base or [model].source must be specified",
206
+ hint="Provide base model (e.g., 'Qwen/Qwen3-4B') or source checkpoint"
207
+ )
208
+
209
+ if not model.get("trainer_mode"):
210
+ raise InvalidRLConfigError(
211
+ detail="[model].trainer_mode is required (e.g., 'full', 'lora')"
212
+ )
213
+
214
+ if not model.get("label"):
215
+ raise InvalidRLConfigError(
216
+ detail="[model].label is required (e.g., 'my-rl-model')",
217
+ hint="Provide a descriptive label for this model"
218
+ )
219
+
220
+ # Validate compute section
221
+ compute = config.get("compute", {})
222
+ if not compute:
223
+ raise MissingComputeError(
224
+ detail="[compute] section is required",
225
+ hint="Specify gpu_type and gpu_count"
226
+ )
227
+
228
+ if not compute.get("gpu_type"):
229
+ raise MissingComputeError(
230
+ detail="[compute].gpu_type is required (e.g., 'H100', 'A100')"
231
+ )
232
+
233
+ if not compute.get("gpu_count"):
234
+ raise MissingComputeError(
235
+ detail="[compute].gpu_count is required"
236
+ )
237
+
238
+ # Check for rollout configuration
239
+ rollout = config.get("rollout", {})
240
+ if not rollout:
241
+ raise InvalidRLConfigError(
242
+ detail="[rollout] section is required for RL configs",
243
+ hint="Specify env_name, policy_name, max_turns, etc."
244
+ )
245
+
246
+ if not rollout.get("env_name"):
247
+ raise InvalidRLConfigError(
248
+ detail="[rollout].env_name is required (e.g., 'math', 'crafter')"
249
+ )
250
+
251
+ if not rollout.get("policy_name"):
252
+ raise InvalidRLConfigError(
253
+ detail="[rollout].policy_name is required"
254
+ )
255
+
256
+ # Validate topology section (can be top-level or under compute)
257
+ topology = config.get("topology") or compute.get("topology", {})
258
+ if not topology:
259
+ raise InvalidRLConfigError(
260
+ detail="[topology] or [compute.topology] section is required",
261
+ hint="Specify gpus_for_vllm, gpus_for_training, etc."
262
+ )
263
+
264
+ # Check for training section and its required fields
265
+ training = config.get("training", {})
266
+ if training:
267
+ required_training_fields = {
268
+ "num_epochs": "number of training epochs",
269
+ "iterations_per_epoch": "iterations per epoch",
270
+ "max_turns": "maximum turns",
271
+ "batch_size": "batch size",
272
+ "group_size": "group size",
273
+ "learning_rate": "learning rate",
274
+ }
275
+
276
+ for field, description in required_training_fields.items():
277
+ if field not in training:
278
+ raise InvalidRLConfigError(
279
+ detail=f"[training].{field} is required ({description})",
280
+ hint=f"Add {field} to the [training] section"
281
+ )
282
+
283
+ # Check for evaluation section
284
+ evaluation = config.get("evaluation", {})
285
+ if evaluation:
286
+ required_eval_fields = {
287
+ "instances": "number of evaluation instances",
288
+ "every_n_iters": "evaluation frequency",
289
+ "seeds": "evaluation seeds",
290
+ }
291
+
292
+ for field, description in required_eval_fields.items():
293
+ if field not in evaluation:
294
+ raise InvalidRLConfigError(
295
+ detail=f"[evaluation].{field} is required ({description})",
296
+ hint=f"Add {field} to the [evaluation] section"
297
+ )
298
+
299
+ # Inject services section if not present (will be populated at runtime)
300
+ if "services" not in config:
301
+ config["services"] = {
302
+ "task_url": "placeholder", # Will be resolved at runtime
303
+ }
304
+
305
+ # Inject reference placement if not present (like builders.py does)
306
+ # Reference is now under compute.topology.reference_placement
307
+ if "compute" not in config:
308
+ config["compute"] = {}
309
+ if "topology" not in config["compute"]:
310
+ config["compute"]["topology"] = {}
311
+ if "reference_placement" not in config["compute"]["topology"]:
312
+ config["compute"]["topology"]["reference_placement"] = "none"
313
+
314
+ # Validate judge/rubric configuration with formalized Pydantic models
315
+ # This will emit deprecation warnings for dead fields and validate structure
316
+ try:
317
+ rubric_config, judge_config = extract_and_validate_judge_rubric(config)
318
+ # Validation passed - configs are clean and ready for use
319
+ # The validated Pydantic models can be used by training code if needed
320
+ except (InvalidJudgeConfigError, InvalidRubricConfigError) as exc:
321
+ raise InvalidRLConfigError(
322
+ detail=f"Judge/Rubric validation failed: {exc.detail}",
323
+ hint="Check JUDGE_RUBRIC_CLEANUP_GUIDE.md for migration help."
324
+ ) from exc
325
+
326
+ # Validate using Pydantic model
327
+ try:
328
+ validated = RLConfig.from_mapping(config)
329
+ return validated.to_dict()
330
+ except ValidationError as exc:
331
+ errors = []
332
+ for error in exc.errors():
333
+ loc = ".".join(str(x) for x in error["loc"])
334
+ msg = error["msg"]
335
+ errors.append(f" • {loc}: {msg}")
336
+ raise InvalidRLConfigError(
337
+ detail="Pydantic validation failed:\n" + "\n".join(errors)
338
+ ) from exc
339
+
340
+
341
+ def load_and_validate_sft(config_path: Path) -> dict[str, Any]:
342
+ """Load and validate an SFT TOML configuration file.
343
+
344
+ Args:
345
+ config_path: Path to TOML configuration file
346
+
347
+ Returns:
348
+ Validated configuration dictionary
349
+
350
+ Raises:
351
+ TomlParseError: If TOML parsing fails
352
+ InvalidSFTConfigError: If validation fails
353
+ """
354
+ try:
355
+ raw_config = load_toml(config_path)
356
+ except Exception as exc:
357
+ raise TomlParseError(
358
+ path=str(config_path),
359
+ detail=str(exc)
360
+ ) from exc
361
+
362
+ return validate_sft_config(raw_config)
363
+
364
+
365
+ def load_and_validate_rl(config_path: Path) -> dict[str, Any]:
366
+ """Load and validate an RL TOML configuration file.
367
+
368
+ Args:
369
+ config_path: Path to TOML configuration file
370
+
371
+ Returns:
372
+ Validated configuration dictionary
373
+
374
+ Raises:
375
+ TomlParseError: If TOML parsing fails
376
+ InvalidRLConfigError: If validation fails
377
+ """
378
+ try:
379
+ raw_config = load_toml(config_path)
380
+ except Exception as exc:
381
+ raise TomlParseError(
382
+ path=str(config_path),
383
+ detail=str(exc)
384
+ ) from exc
385
+
386
+ return validate_rl_config(raw_config)
synth_ai/cli/demo.py CHANGED
@@ -1,165 +1,37 @@
1
- #!/usr/bin/env python3
2
- """
3
- CLI: interactive launcher for example demos and RL demo helpers.
4
-
5
- - `synth-ai demo` (no subcommand) -> initialize RL demo files into ./synth_demo/
6
- - `synth-ai demo deploy|configure|run` -> invoke RL demo helpers directly.
7
- """
8
-
9
- from __future__ import annotations
10
-
11
- import importlib
12
- import os
13
- import subprocess
1
+ import shutil
14
2
  from pathlib import Path
15
- from typing import Any, cast
16
3
 
17
4
  import click
18
- from click.exceptions import Exit
19
-
20
- demo_commands = cast(
21
- Any, importlib.import_module("synth_ai.demos.core.cli")
22
- )
23
-
24
-
25
- def _find_demo_scripts(root: Path) -> list[Path]:
26
- if not root.exists():
27
- return []
28
- return sorted([p for p in root.rglob("run_demo.sh") if p.is_file()])
29
-
30
-
31
- def _run_demo_command(func, *args, **kwargs) -> None:
32
- """Invoke a demo command and exit via Click on non-zero status codes."""
33
-
34
- try:
35
- result = func(*args, **kwargs)
36
- except SystemExit as exc: # pragma: no cover - defensive
37
- raise Exit(exc.code or 1) from exc
38
-
39
- if result is None:
40
- return
41
-
42
- try:
43
- code = int(result)
44
- except (TypeError, ValueError):
45
- return
46
- if code != 0:
47
- raise Exit(code)
48
-
49
-
50
- def register(cli):
51
- @cli.group("demo", invoke_without_command=True)
52
- @click.option(
53
- "--force", is_flag=True, help="Overwrite existing files in CWD when initializing demo"
54
- )
55
- @click.option("--list", "list_only", is_flag=True, help="List available legacy demos and exit")
56
- @click.option("-f", "filter_term", default="", help="Filter legacy demos by substring")
57
- @click.pass_context
58
- def demo(ctx: click.Context, force: bool, list_only: bool, filter_term: str):
59
- """Demo helpers.
60
5
 
61
- - Default (no subcommand): initialize RL demo files into ./synth_demo/ (alias of rl_demo init)
62
- - Legacy mode: with --list, find and run examples/*/run_demo.sh
63
- - New RL demo subcommands: deploy, configure, run
64
- """
65
- if ctx.invoked_subcommand is not None:
66
- return
67
6
 
68
- # If explicitly asked to list legacy demos, show interactive picker
69
- if list_only:
70
- repo_root = Path(os.getcwd())
71
- examples_dir = repo_root / "examples"
72
- demos = _find_demo_scripts(examples_dir)
73
- if filter_term:
74
- demos = [p for p in demos if filter_term.lower() in str(p).lower()]
7
+ DEMO_SOURCES: dict[str, str] = {
8
+ "local": "crafter",
9
+ "modal": "math"
10
+ }
75
11
 
76
- if not demos:
77
- click.echo("No run_demo.sh scripts found under examples/.")
78
- return
79
12
 
80
- click.echo("Available demos:")
81
- for idx, p in enumerate(demos, start=1):
82
- click.echo(f" {idx}. {p.relative_to(repo_root)}")
83
- click.echo("")
84
-
85
- def _validate_choice(val: str) -> int:
86
- try:
87
- i = int(val)
88
- except Exception as err:
89
- raise click.BadParameter("Enter a number from the list") from err
90
- if i < 1 or i > len(demos):
91
- raise click.BadParameter(f"Choose a number between 1 and {len(demos)}")
92
- return i
93
-
94
- choice = click.prompt("Select a demo to run", value_proc=_validate_choice)
95
- script = demos[choice - 1]
96
-
97
- click.echo("")
98
- click.echo(f"šŸš€ Running {script.relative_to(repo_root)}\n")
99
-
100
- try:
101
- subprocess.run(["bash", str(script)], check=True)
102
- except subprocess.CalledProcessError as e:
103
- click.echo(f"āŒ Demo exited with non-zero status: {e.returncode}")
104
- except KeyboardInterrupt:
105
- click.echo("\nšŸ›‘ Demo interrupted by user")
106
- return
107
-
108
- # Default: initialize RL demo files via new command
109
- _run_demo_command(demo_commands.init, force=force)
110
-
111
- # (prepare command removed; configure now prepares baseline TOML)
112
-
113
- # Help pyright understand dynamic Click group attributes
114
- _dg = cast(Any, demo)
115
-
116
- @_dg.command("deploy")
117
- @click.option("--local", is_flag=True, help="Run local FastAPI instead of Modal deploy")
118
- @click.option(
119
- "--app",
120
- type=click.Path(),
121
- default=None,
122
- help="Path to Modal app.py for uv run modal deploy",
123
- )
124
- @click.option("--name", type=str, default="synth-math-demo", help="Modal app name")
125
- @click.option(
126
- "--script",
127
- type=click.Path(),
128
- default=None,
129
- help="Path to deploy_task_app.sh (optional legacy)",
130
- )
131
- def demo_deploy(local: bool, app: str | None, name: str, script: str | None):
132
- _run_demo_command(
133
- demo_commands.deploy,
134
- local=local,
135
- app=app,
136
- name=name,
137
- script=script,
138
- )
139
-
140
- @_dg.command("configure")
141
- def demo_configure():
142
- _run_demo_command(demo_commands.run)
143
-
144
- @_dg.command("setup")
145
- def demo_setup():
146
- _run_demo_command(demo_commands.setup)
147
-
148
- @_dg.command("run")
149
- @click.option("--batch-size", type=int, default=None)
150
- @click.option("--group-size", type=int, default=None)
151
- @click.option("--model", type=str, default=None)
152
- @click.option("--timeout", type=int, default=600)
153
- def demo_run(batch_size: int | None, group_size: int | None, model: str | None, timeout: int):
154
- _run_demo_command(
155
- demo_commands.run,
156
- batch_size=batch_size,
157
- group_size=group_size,
158
- model=model,
159
- timeout=timeout,
160
- )
161
-
162
- @cli.command("setup")
163
- def setup_alias():
164
- """Perform SDK handshake and write keys to .env."""
165
- _run_demo_command(demo_commands.setup)
13
+ @click.command()
14
+ @click.option(
15
+ "--runtime",
16
+ "runtime",
17
+ type=click.Choice(tuple(DEMO_SOURCES.keys()), case_sensitive=False),
18
+ default="local",
19
+ show_default=True,
20
+ help="Select runtime to load a demo task app to your cwd. Options: local, modal"
21
+ )
22
+ def demo_cmd(runtime: str) -> None:
23
+ runtime_key = runtime.lower()
24
+ demo_name = DEMO_SOURCES[runtime_key]
25
+ package_root = Path(__file__).resolve().parents[1]
26
+ src = package_root / "demos" / demo_name
27
+ if not src.exists():
28
+ raise click.ClickException(f"Demo source directory not found: {src}")
29
+
30
+ dst = Path.cwd() / src.name
31
+ if dst.exists():
32
+ raise click.ClickException(
33
+ f"Destination already exists: {dst}. Remove it first if you want to re-copy."
34
+ )
35
+
36
+ shutil.copytree(src, dst)
37
+ click.echo(f"Copied {demo_name} demo to {dst}")
@@ -0,0 +1,43 @@
1
+ """Deploy command package - imports from deploy.py module."""
2
+ from __future__ import annotations
3
+
4
+ # Import from the deploy.py module file (using importlib to avoid conflicts)
5
+ # This package exists for backwards compatibility
6
+ import importlib
7
+ import importlib.util
8
+ import sys
9
+ from pathlib import Path
10
+ from typing import TYPE_CHECKING
11
+
12
+ if TYPE_CHECKING:
13
+ from click import Command
14
+
15
+ try:
16
+ # Import the deploy.py module directly by file path to avoid package/module conflict
17
+ deploy_module_path = Path(__file__).parent.parent / "deploy.py"
18
+ if deploy_module_path.exists():
19
+ spec = importlib.util.spec_from_file_location("synth_ai.cli.deploy_module", deploy_module_path)
20
+ if spec and spec.loader:
21
+ deploy_module = importlib.util.module_from_spec(spec)
22
+ sys.modules["synth_ai.cli.deploy_module"] = deploy_module
23
+ spec.loader.exec_module(deploy_module)
24
+ command: Command | None = getattr(deploy_module, "deploy_cmd", None) # type: ignore[assignment]
25
+ deploy_cmd: Command | None = command # type: ignore[assignment]
26
+ else:
27
+ raise ImportError("Could not load deploy.py")
28
+ else:
29
+ raise ImportError("deploy.py not found")
30
+
31
+ get_command: None = None # Not used in current implementation
32
+
33
+ __all__: list[str] = [
34
+ "command",
35
+ "deploy_cmd",
36
+ ]
37
+ except Exception:
38
+ # If deploy.py doesn't exist or fails to import, provide a stub
39
+ command: Command | None = None # type: ignore[assignment]
40
+ deploy_cmd: Command | None = None # type: ignore[assignment]
41
+ get_command: None = None
42
+
43
+ __all__: list[str] = []