soup-cli 0.53.2__tar.gz → 0.53.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (522) hide show
  1. {soup_cli-0.53.2 → soup_cli-0.53.3}/CONTRIBUTING.md +1 -1
  2. {soup_cli-0.53.2 → soup_cli-0.53.3}/PKG-INFO +7 -8
  3. {soup_cli-0.53.2 → soup_cli-0.53.3}/README.md +7 -8
  4. {soup_cli-0.53.2 → soup_cli-0.53.3}/SECURITY.md +4 -1
  5. {soup_cli-0.53.2 → soup_cli-0.53.3}/pyproject.toml +1 -1
  6. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/__init__.py +1 -1
  7. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/config/schema.py +30 -0
  8. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/trainer/grpo.py +27 -1
  9. soup_cli-0.53.3/soup_cli/utils/prm.py +175 -0
  10. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_v0500_part_e.py +7 -3
  11. soup_cli-0.53.3/tests/test_v0533.py +375 -0
  12. soup_cli-0.53.2/soup_cli/utils/prm.py +0 -100
  13. {soup_cli-0.53.2 → soup_cli-0.53.3}/.dockerignore +0 -0
  14. {soup_cli-0.53.2 → soup_cli-0.53.3}/.github/FUNDING.yml +0 -0
  15. {soup_cli-0.53.2 → soup_cli-0.53.3}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  16. {soup_cli-0.53.2 → soup_cli-0.53.3}/.github/ISSUE_TEMPLATE/config.yml +0 -0
  17. {soup_cli-0.53.2 → soup_cli-0.53.3}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  18. {soup_cli-0.53.2 → soup_cli-0.53.3}/.github/pull_request_template.md +0 -0
  19. {soup_cli-0.53.2 → soup_cli-0.53.3}/.github/workflows/ci.yml +0 -0
  20. {soup_cli-0.53.2 → soup_cli-0.53.3}/.github/workflows/docker.yml +0 -0
  21. {soup_cli-0.53.2 → soup_cli-0.53.3}/.github/workflows/publish.yml +0 -0
  22. {soup_cli-0.53.2 → soup_cli-0.53.3}/.github/workflows/recipe-validation.yml +0 -0
  23. {soup_cli-0.53.2 → soup_cli-0.53.3}/.gitignore +0 -0
  24. {soup_cli-0.53.2 → soup_cli-0.53.3}/CODEOWNERS +0 -0
  25. {soup_cli-0.53.2 → soup_cli-0.53.3}/CODE_OF_CONDUCT.md +0 -0
  26. {soup_cli-0.53.2 → soup_cli-0.53.3}/Dockerfile +0 -0
  27. {soup_cli-0.53.2 → soup_cli-0.53.3}/LICENSE +0 -0
  28. {soup_cli-0.53.2 → soup_cli-0.53.3}/NOTICE +0 -0
  29. {soup_cli-0.53.2 → soup_cli-0.53.3}/docker-compose.yml +0 -0
  30. {soup_cli-0.53.2 → soup_cli-0.53.3}/examples/README.md +0 -0
  31. {soup_cli-0.53.2 → soup_cli-0.53.3}/examples/configs/dpo_chat.yaml +0 -0
  32. {soup_cli-0.53.2 → soup_cli-0.53.3}/examples/configs/dpo_example.yaml +0 -0
  33. {soup_cli-0.53.2 → soup_cli-0.53.3}/examples/configs/grpo_reasoning.yaml +0 -0
  34. {soup_cli-0.53.2 → soup_cli-0.53.3}/examples/configs/rlhf_step1_sft.yaml +0 -0
  35. {soup_cli-0.53.2 → soup_cli-0.53.3}/examples/configs/rlhf_step2_reward.yaml +0 -0
  36. {soup_cli-0.53.2 → soup_cli-0.53.3}/examples/configs/rlhf_step3_ppo.yaml +0 -0
  37. {soup_cli-0.53.2 → soup_cli-0.53.3}/examples/configs/sft_basic.yaml +0 -0
  38. {soup_cli-0.53.2 → soup_cli-0.53.3}/examples/configs/vision_llama.yaml +0 -0
  39. {soup_cli-0.53.2 → soup_cli-0.53.3}/examples/data/alpaca_tiny.jsonl +0 -0
  40. {soup_cli-0.53.2 → soup_cli-0.53.3}/examples/data/chat_preferences.jsonl +0 -0
  41. {soup_cli-0.53.2 → soup_cli-0.53.3}/examples/data/dpo_sample.jsonl +0 -0
  42. {soup_cli-0.53.2 → soup_cli-0.53.3}/examples/data/reasoning_math.jsonl +0 -0
  43. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup.png +0 -0
  44. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/__main__.py +0 -0
  45. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/autopilot/__init__.py +0 -0
  46. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/autopilot/analyzer.py +0 -0
  47. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/autopilot/decisions.py +0 -0
  48. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/autopilot/generate_config.py +0 -0
  49. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/cans/__init__.py +0 -0
  50. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/cans/pack.py +0 -0
  51. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/cans/publish.py +0 -0
  52. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/cans/run.py +0 -0
  53. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/cans/schema.py +0 -0
  54. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/cans/unpack.py +0 -0
  55. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/cans/verify.py +0 -0
  56. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/cli.py +0 -0
  57. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/commands/__init__.py +0 -0
  58. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/commands/adapters.py +0 -0
  59. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/commands/agent.py +0 -0
  60. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/commands/autopilot.py +0 -0
  61. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/commands/bench.py +0 -0
  62. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/commands/can.py +0 -0
  63. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/commands/chat.py +0 -0
  64. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/commands/cost.py +0 -0
  65. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/commands/data.py +0 -0
  66. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/commands/data_forge.py +0 -0
  67. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/commands/data_mix.py +0 -0
  68. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/commands/data_score.py +0 -0
  69. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/commands/delinearize_llama4.py +0 -0
  70. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/commands/deploy.py +0 -0
  71. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/commands/diff.py +0 -0
  72. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/commands/doctor.py +0 -0
  73. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/commands/eval.py +0 -0
  74. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/commands/export.py +0 -0
  75. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/commands/fetch.py +0 -0
  76. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/commands/generate.py +0 -0
  77. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/commands/history.py +0 -0
  78. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/commands/infer.py +0 -0
  79. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/commands/init.py +0 -0
  80. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/commands/llama.py +0 -0
  81. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/commands/merge.py +0 -0
  82. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/commands/merge_sharded_fsdp_weights.py +0 -0
  83. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/commands/migrate.py +0 -0
  84. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/commands/monitor.py +0 -0
  85. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/commands/plugins.py +0 -0
  86. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/commands/profile.py +0 -0
  87. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/commands/push.py +0 -0
  88. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/commands/quantize.py +0 -0
  89. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/commands/quickstart.py +0 -0
  90. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/commands/recipes.py +0 -0
  91. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/commands/registry.py +0 -0
  92. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/commands/runs.py +0 -0
  93. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/commands/serve.py +0 -0
  94. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/commands/sweep.py +0 -0
  95. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/commands/train.py +0 -0
  96. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/commands/tui.py +0 -0
  97. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/commands/ui.py +0 -0
  98. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/commands/why.py +0 -0
  99. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/config/__init__.py +0 -0
  100. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/config/loader.py +0 -0
  101. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/data/__init__.py +0 -0
  102. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/data/augment.py +0 -0
  103. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/data/chat_templates.py +0 -0
  104. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/data/collators.py +0 -0
  105. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/data/formats.py +0 -0
  106. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/data/loader.py +0 -0
  107. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/data/loss_mask.py +0 -0
  108. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/data/providers/__init__.py +0 -0
  109. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/data/providers/_utils.py +0 -0
  110. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/data/providers/anthropic.py +0 -0
  111. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/data/providers/ollama.py +0 -0
  112. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/data/providers/vllm.py +0 -0
  113. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/data/sft_format.py +0 -0
  114. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/data/templates/__init__.py +0 -0
  115. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/data/templates/code.py +0 -0
  116. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/data/templates/conversation.py +0 -0
  117. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/data/templates/preference.py +0 -0
  118. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/data/templates/qa.py +0 -0
  119. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/data/templates/reasoning.py +0 -0
  120. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/data/templates/tool_calling.py +0 -0
  121. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/data/templates/verifiable.py +0 -0
  122. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/data/traces/__init__.py +0 -0
  123. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/data/traces/pair_builder.py +0 -0
  124. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/data/traces/parsers.py +0 -0
  125. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/data/traces/quality.py +0 -0
  126. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/data/validator.py +0 -0
  127. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/eval/__init__.py +0 -0
  128. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/eval/arena.py +0 -0
  129. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/eval/benchmarks_v0_43.py +0 -0
  130. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/eval/calibrate.py +0 -0
  131. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/eval/checkpoint_intelligence.py +0 -0
  132. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/eval/custom.py +0 -0
  133. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/eval/forgetting.py +0 -0
  134. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/eval/gate.py +0 -0
  135. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/eval/human.py +0 -0
  136. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/eval/judge.py +0 -0
  137. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/eval/leaderboard.py +0 -0
  138. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/eval/quant_check.py +0 -0
  139. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/experiment/__init__.py +0 -0
  140. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/experiment/tracker.py +0 -0
  141. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/migrate/__init__.py +0 -0
  142. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/migrate/axolotl.py +0 -0
  143. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/migrate/common.py +0 -0
  144. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/migrate/llamafactory.py +0 -0
  145. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/migrate/unsloth.py +0 -0
  146. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/monitoring/__init__.py +0 -0
  147. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/monitoring/callback.py +0 -0
  148. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/monitoring/display.py +0 -0
  149. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/monitoring/hf_push.py +0 -0
  150. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/monitoring/trace_logger.py +0 -0
  151. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/plugins/__init__.py +0 -0
  152. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/recipes/__init__.py +0 -0
  153. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/recipes/catalog.py +0 -0
  154. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/registry/__init__.py +0 -0
  155. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/registry/attach.py +0 -0
  156. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/registry/diff.py +0 -0
  157. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/registry/hashing.py +0 -0
  158. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/registry/store.py +0 -0
  159. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/templates/__init__.py +0 -0
  160. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/templates/audio.yaml +0 -0
  161. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/templates/bco.yaml +0 -0
  162. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/templates/chat.yaml +0 -0
  163. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/templates/code.yaml +0 -0
  164. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/templates/embedding.yaml +0 -0
  165. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/templates/fetch_examples/llama-3.1-8b-lora.yaml +0 -0
  166. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/templates/fetch_examples/qwen2.5-7b-dpo.yaml +0 -0
  167. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/templates/fetch_examples/zero3-cpu-offload.json +0 -0
  168. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/templates/ipo.yaml +0 -0
  169. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/templates/kto.yaml +0 -0
  170. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/templates/longcontext.yaml +0 -0
  171. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/templates/manifest.json +0 -0
  172. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/templates/medical.yaml +0 -0
  173. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/templates/moe.yaml +0 -0
  174. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/templates/orpo.yaml +0 -0
  175. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/templates/pretrain.yaml +0 -0
  176. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/templates/reasoning.yaml +0 -0
  177. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/templates/rlhf.yaml +0 -0
  178. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/templates/simpo.yaml +0 -0
  179. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/templates/tool-calling.yaml +0 -0
  180. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/templates/vision.yaml +0 -0
  181. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/trainer/__init__.py +0 -0
  182. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/trainer/bco.py +0 -0
  183. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/trainer/classifier.py +0 -0
  184. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/trainer/distill.py +0 -0
  185. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/trainer/dpo.py +0 -0
  186. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/trainer/embedding.py +0 -0
  187. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/trainer/ipo.py +0 -0
  188. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/trainer/kto.py +0 -0
  189. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/trainer/mlx_dpo.py +0 -0
  190. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/trainer/mlx_grpo.py +0 -0
  191. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/trainer/mlx_routing.py +0 -0
  192. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/trainer/mlx_sft.py +0 -0
  193. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/trainer/orpo.py +0 -0
  194. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/trainer/ppo.py +0 -0
  195. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/trainer/preference.py +0 -0
  196. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/trainer/pretrain.py +0 -0
  197. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/trainer/reward_model.py +0 -0
  198. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/trainer/rewards.py +0 -0
  199. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/trainer/sft.py +0 -0
  200. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/trainer/simpo.py +0 -0
  201. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/tui_app.py +0 -0
  202. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/ui/__init__.py +0 -0
  203. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/ui/app.py +0 -0
  204. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/ui/plugins/__init__.py +0 -0
  205. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/ui/static/app.js +0 -0
  206. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/ui/static/index.html +0 -0
  207. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/ui/static/logo.png +0 -0
  208. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/ui/static/logo.svg +0 -0
  209. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/ui/static/style.css +0 -0
  210. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/__init__.py +0 -0
  211. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/activation_offload.py +0 -0
  212. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/advanced_precision.py +0 -0
  213. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/agent_forge.py +0 -0
  214. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/agent_rollout.py +0 -0
  215. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/anthropic_messages.py +0 -0
  216. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/auto_quant.py +0 -0
  217. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/batch_probe.py +0 -0
  218. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/bitnet.py +0 -0
  219. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/block_expansion.py +0 -0
  220. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/checkpoint_trigger.py +0 -0
  221. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/classifier.py +0 -0
  222. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/constants.py +0 -0
  223. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/convergence.py +0 -0
  224. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/crash.py +0 -0
  225. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/cross_doc_attn.py +0 -0
  226. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/curriculum.py +0 -0
  227. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/curriculum_dynamic.py +0 -0
  228. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/cut_ce.py +0 -0
  229. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/data_forge.py +0 -0
  230. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/data_mix.py +0 -0
  231. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/data_pipeline.py +0 -0
  232. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/data_score.py +0 -0
  233. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/deepspeed.py +0 -0
  234. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/delinearize_llama4.py +0 -0
  235. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/demo_bundles.py +0 -0
  236. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/deploy_autopilot.py +0 -0
  237. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/deploy_measure.py +0 -0
  238. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/distill.py +0 -0
  239. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/dpo_variants.py +0 -0
  240. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/ebft_gdpo.py +0 -0
  241. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/encoding.py +0 -0
  242. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/errors.py +0 -0
  243. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/fetch_examples.py +0 -0
  244. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/flash_attn.py +0 -0
  245. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/fp8.py +0 -0
  246. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/freeze.py +0 -0
  247. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/fsdp.py +0 -0
  248. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/fsdp_consolidate.py +0 -0
  249. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/galore.py +0 -0
  250. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/gguf_quant.py +0 -0
  251. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/gpu.py +0 -0
  252. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/gpu_monitor.py +0 -0
  253. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/graceful_save.py +0 -0
  254. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/grad_accum.py +0 -0
  255. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/gradient_ckpt.py +0 -0
  256. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/grpo_long_context.py +0 -0
  257. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/grpo_variants.py +0 -0
  258. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/hf.py +0 -0
  259. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/hf_space.py +0 -0
  260. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/hubs.py +0 -0
  261. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/integrations.py +0 -0
  262. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/jinja_analyzer.py +0 -0
  263. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/kernel_picker.py +0 -0
  264. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/kv_cache.py +0 -0
  265. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/launcher.py +0 -0
  266. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/liger.py +0 -0
  267. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/llama_proxy.py +0 -0
  268. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/llama_server_timings.py +0 -0
  269. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/loftq_init.py +0 -0
  270. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/log_level.py +0 -0
  271. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/long_context.py +0 -0
  272. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/longlora.py +0 -0
  273. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/lr_finder.py +0 -0
  274. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/lr_groups.py +0 -0
  275. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/metrics.py +0 -0
  276. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/mii.py +0 -0
  277. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/mixed_precision.py +0 -0
  278. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/mlx.py +0 -0
  279. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/moe.py +0 -0
  280. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/moe_quant.py +0 -0
  281. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/multipack.py +0 -0
  282. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/multipack_sampler.py +0 -0
  283. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/multipack_trainer.py +0 -0
  284. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/neat_packing.py +0 -0
  285. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/ngram_spec.py +0 -0
  286. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/nlg_metrics.py +0 -0
  287. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/ollama.py +0 -0
  288. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/onboarding.py +0 -0
  289. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/optimizer_zoo.py +0 -0
  290. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/paths.py +0 -0
  291. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/peft_builder.py +0 -0
  292. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/peft_patches.py +0 -0
  293. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/peft_wiring.py +0 -0
  294. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/pipeline.py +0 -0
  295. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/preference_combine.py +0 -0
  296. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/profiler.py +0 -0
  297. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/profiling.py +0 -0
  298. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/profiling_v0_43.py +0 -0
  299. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/qat.py +0 -0
  300. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/qr_url.py +0 -0
  301. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/quality.py +0 -0
  302. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/quant_menu.py +0 -0
  303. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/reasoning_effort.py +0 -0
  304. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/reasoning_parser.py +0 -0
  305. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/recipe_dag.py +0 -0
  306. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/registry.py +0 -0
  307. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/relora.py +0 -0
  308. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/replay.py +0 -0
  309. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/ring_attention.py +0 -0
  310. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/run_cost.py +0 -0
  311. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/save_formats.py +0 -0
  312. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/server_tools.py +0 -0
  313. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/sglang.py +0 -0
  314. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/shortcuts.py +0 -0
  315. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/spec_pairing.py +0 -0
  316. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/spike_recovery.py +0 -0
  317. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/sse_train_stream.py +0 -0
  318. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/structured_output.py +0 -0
  319. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/sweep_config.py +0 -0
  320. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/tail_latency.py +0 -0
  321. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/tool_outputs.py +0 -0
  322. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/topology.py +0 -0
  323. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/tracing.py +0 -0
  324. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/trackers.py +0 -0
  325. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/trainer_plugins.py +0 -0
  326. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/trust_remote.py +0 -0
  327. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/tts.py +0 -0
  328. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/ui_env.py +0 -0
  329. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/unsloth.py +0 -0
  330. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/v028_features.py +0 -0
  331. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/vllm.py +0 -0
  332. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/vscode_setup.py +0 -0
  333. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/warmup.py +0 -0
  334. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_cli/utils/why.py +0 -0
  335. {soup_cli-0.53.2 → soup_cli-0.53.3}/soup_logo_svg.svg +0 -0
  336. {soup_cli-0.53.2 → soup_cli-0.53.3}/templates/chat.yaml +0 -0
  337. {soup_cli-0.53.2 → soup_cli-0.53.3}/templates/code.yaml +0 -0
  338. {soup_cli-0.53.2 → soup_cli-0.53.3}/templates/medical.yaml +0 -0
  339. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/__init__.py +0 -0
  340. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/conftest.py +0 -0
  341. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/qa/v053_qa.md +0 -0
  342. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_adapters.py +0 -0
  343. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_advanced_peft.py +0 -0
  344. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_assistant_mask.py +0 -0
  345. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_audio.py +0 -0
  346. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_auto_tuning.py +0 -0
  347. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_autopilot.py +0 -0
  348. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_awq_gptq_export.py +0 -0
  349. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_batch_probe.py +0 -0
  350. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_bco.py +0 -0
  351. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_bench.py +0 -0
  352. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_bugfixes.py +0 -0
  353. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_callback.py +0 -0
  354. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_cans.py +0 -0
  355. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_chat.py +0 -0
  356. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_chat_template.py +0 -0
  357. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_cli.py +0 -0
  358. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_cli_subprocess.py +0 -0
  359. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_config.py +0 -0
  360. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_cost.py +0 -0
  361. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_crash_reporter.py +0 -0
  362. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_curriculum.py +0 -0
  363. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_data.py +0 -0
  364. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_data_augment.py +0 -0
  365. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_data_sample.py +0 -0
  366. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_data_split.py +0 -0
  367. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_data_tools.py +0 -0
  368. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_dataset_hub.py +0 -0
  369. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_dataset_registry.py +0 -0
  370. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_deepspeed.py +0 -0
  371. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_deploy_ollama.py +0 -0
  372. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_diff.py +0 -0
  373. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_display.py +0 -0
  374. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_doctor.py +0 -0
  375. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_dpo_example.py +0 -0
  376. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_dpo_variants.py +0 -0
  377. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_embedding.py +0 -0
  378. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_errors.py +0 -0
  379. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_eval.py +0 -0
  380. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_eval_gate.py +0 -0
  381. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_eval_platform.py +0 -0
  382. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_export.py +0 -0
  383. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_formats.py +0 -0
  384. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_fp8_recipe.py +0 -0
  385. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_freeze_training.py +0 -0
  386. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_generate.py +0 -0
  387. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_gpu.py +0 -0
  388. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_grpo.py +0 -0
  389. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_hf_integration.py +0 -0
  390. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_infer.py +0 -0
  391. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_inference_advanced.py +0 -0
  392. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_init.py +0 -0
  393. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_ipo.py +0 -0
  394. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_jinja_analyzer.py +0 -0
  395. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_kto.py +0 -0
  396. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_loader.py +0 -0
  397. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_log_level.py +0 -0
  398. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_loss_watchdog.py +0 -0
  399. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_merge.py +0 -0
  400. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_migrate.py +0 -0
  401. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_mlx_backend.py +0 -0
  402. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_moe.py +0 -0
  403. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_multi_adapter.py +0 -0
  404. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_multi_gpu.py +0 -0
  405. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_multipack_config.py +0 -0
  406. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_multipack_invariants.py +0 -0
  407. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_multipack_sampler.py +0 -0
  408. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_neat_packing.py +0 -0
  409. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_neftune_rslora.py +0 -0
  410. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_onnx_tensorrt_export.py +0 -0
  411. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_orpo.py +0 -0
  412. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_packing.py +0 -0
  413. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_part_a_wave1.py +0 -0
  414. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_part_a_wave2.py +0 -0
  415. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_part_b.py +0 -0
  416. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_part_c.py +0 -0
  417. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_part_d.py +0 -0
  418. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_part_e.py +0 -0
  419. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_part_f_hardening.py +0 -0
  420. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_peft_methods.py +0 -0
  421. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_peft_patches.py +0 -0
  422. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_performance.py +0 -0
  423. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_pissa_init.py +0 -0
  424. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_ppo.py +0 -0
  425. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_preference_dispatcher.py +0 -0
  426. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_preference_multi.py +0 -0
  427. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_preference_multi_runtime.py +0 -0
  428. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_pretrain.py +0 -0
  429. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_profile.py +0 -0
  430. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_profiling.py +0 -0
  431. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_progress.py +0 -0
  432. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_push.py +0 -0
  433. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_qat.py +0 -0
  434. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_quality_filter.py +0 -0
  435. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_quant_check.py +0 -0
  436. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_quant_menu.py +0 -0
  437. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_quickstart.py +0 -0
  438. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_rank_pattern.py +0 -0
  439. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_recipes.py +0 -0
  440. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_recipes_v031.py +0 -0
  441. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_registry.py +0 -0
  442. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_relora.py +0 -0
  443. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_replay.py +0 -0
  444. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_resume.py +0 -0
  445. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_rlvr.py +0 -0
  446. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_run_cost.py +0 -0
  447. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_runs.py +0 -0
  448. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_serve.py +0 -0
  449. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_server_generate.py +0 -0
  450. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_sglang_serve.py +0 -0
  451. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_simpo.py +0 -0
  452. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_smoke_train.py +0 -0
  453. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_speculative_decoding.py +0 -0
  454. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_sweep.py +0 -0
  455. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_synth_data_pro.py +0 -0
  456. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_templates_yaml.py +0 -0
  457. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_tensorboard.py +0 -0
  458. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_tool_calling.py +0 -0
  459. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_trace_to_pref.py +0 -0
  460. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_tracker.py +0 -0
  461. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_trainer_coverage_v035.py +0 -0
  462. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_trainer_init.py +0 -0
  463. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_training_intelligence.py +0 -0
  464. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_training_speed.py +0 -0
  465. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_trust_remote_code.py +0 -0
  466. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_tui.py +0 -0
  467. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_ui.py +0 -0
  468. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_ui_chat.py +0 -0
  469. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_ui_config_builder.py +0 -0
  470. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_ui_live_monitor.py +0 -0
  471. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_ui_metrics.py +0 -0
  472. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_unsloth.py +0 -0
  473. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_v0401_part_c.py +0 -0
  474. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_v0401_part_d.py +0 -0
  475. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_v0401_part_e.py +0 -0
  476. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_v0402_part_a.py +0 -0
  477. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_v0402_part_b.py +0 -0
  478. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_v0403_part_a.py +0 -0
  479. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_v0403_part_b.py +0 -0
  480. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_v0403_part_c.py +0 -0
  481. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_v0404_part_a.py +0 -0
  482. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_v0404_part_b.py +0 -0
  483. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_v0405_part_a.py +0 -0
  484. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_v0406_part_a.py +0 -0
  485. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_v0410_part_a.py +0 -0
  486. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_v0410_part_b.py +0 -0
  487. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_v0410_part_c.py +0 -0
  488. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_v0420.py +0 -0
  489. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_v0430_part_a.py +0 -0
  490. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_v0430_part_b.py +0 -0
  491. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_v0430_part_c.py +0 -0
  492. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_v0430_part_d.py +0 -0
  493. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_v0440_part_a.py +0 -0
  494. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_v0440_part_b.py +0 -0
  495. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_v0440_part_c.py +0 -0
  496. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_v0440_part_d.py +0 -0
  497. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_v0440_review_followups.py +0 -0
  498. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_v0450.py +0 -0
  499. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_v0460_part_a.py +0 -0
  500. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_v0460_part_b.py +0 -0
  501. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_v0470_part_a.py +0 -0
  502. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_v0470_part_b.py +0 -0
  503. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_v0480_part_a.py +0 -0
  504. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_v0480_part_b.py +0 -0
  505. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_v0490.py +0 -0
  506. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_v0500_part_a.py +0 -0
  507. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_v0500_part_b.py +0 -0
  508. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_v0500_part_c.py +0 -0
  509. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_v0500_part_d.py +0 -0
  510. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_v0510.py +0 -0
  511. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_v0520.py +0 -0
  512. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_v0530.py +0 -0
  513. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_v0531_109.py +0 -0
  514. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_v0531_139.py +0 -0
  515. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_v0531_142.py +0 -0
  516. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_v0531_82.py +0 -0
  517. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_v0532.py +0 -0
  518. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_validator.py +0 -0
  519. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_vision.py +0 -0
  520. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_vllm_serve.py +0 -0
  521. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_why.py +0 -0
  522. {soup_cli-0.53.2 → soup_cli-0.53.3}/tests/test_windows_encoding.py +0 -0
@@ -111,7 +111,7 @@ soup_cli/
111
111
  templates/ - 17 built-in soup.yaml templates (YAML + manifest.json) with load_template loader (v0.39.0, +bco v0.40.0)
112
112
  ui/ - Web UI (FastAPI + HTML/JS SPA)
113
113
 
114
- tests/ - Test suite (185 files, 7842 tests)
114
+ tests/ - Test suite (186 files, 7879 tests)
115
115
  examples/ - Real-world config examples and datasets
116
116
  ```
117
117
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: soup-cli
3
- Version: 0.53.2
3
+ Version: 0.53.3
4
4
  Summary: Fine-tune LLMs in one command. No SSH, no config hell.
5
5
  Project-URL: Homepage, https://github.com/MakazhanAlpamys/Soup
6
6
  Project-URL: Repository, https://github.com/MakazhanAlpamys/Soup
@@ -134,14 +134,13 @@ soup train
134
134
 
135
135
  Latest highlights only. Full history: [GitHub Releases](https://github.com/MakazhanAlpamys/Soup/releases).
136
136
 
137
- **v0.53.2Modality II live trainers**: Four v0.52.0 deferred stubs lifted into real, end-to-end-trainable wrappersknowledge distillation, sequence classification, EBFT / GDPO loss kernels, and gpt-oss-style `reasoning_effort` system-prompt injection.
137
+ **v0.53.3GRPO Plus partial wiring**: Two surgical GRPO Plus fixes from the v0.50.0 deferred-stub family land cleanly. The four larger items (stability callback, variant loss kernels, PRM trainer, multi-objective preference live combine) are scope-deferred to v0.53.4 each warrants a focused release because they each require deep TRL trainer subclassing.
138
138
 
139
- - **`soup train` with `task: distill`.** New `DistillTrainerWrapper`: student + frozen teacher both load via `AutoModelForCausalLM` (separate `trust_remote_code` resolution for each), KL / forward_KL / reverse_KL / JS divergence kernels scaled by `temperature**2` per the Hinton paper. Device-bridge: teacher inputs auto-move to the teacher's device, teacher logits move back onto the student's device before the KL kernel survives HF Trainer's auto-CUDA promotion on a CPU-tagged run. `DataCollatorForSeq2Seq(label_pad_token_id=-100)` handles variable-length pre-tokenised loss-masked rows correctly.
140
- - **`soup train` with `task: classifier | reranker | cross_encoder`.** New `ClassifierTrainerWrapper`: `AutoModelForSequenceClassification` with `num_labels` and `label_names`, auto-routes `single_label_classification` / `multi_label_classification` from `tcfg.classifier_kind`. Multi-label string labels resolved via the `label_names` map with a 1024-entry cap + dedup. Training Setup Panel renders `Head: num_labels=N, kind=...` instead of LoRA r/alpha for the classifier family.
141
- - **EBFT structured / strided + GDPO standard / length_normalized / margin loss kernels.** `apply_ebft_loss` and `apply_gdpo_loss` exit the v0.52.0 `NotImplementedError` stubs with finite-only-input guards and bool-rejected numeric params. `attach_ebft_compute_loss(trainer, tcfg)` (SFT) and `attach_gdpo_compute_loss(trainer, tcfg)` (DPO) wrap `Trainer.compute_loss` idempotentlyre-attach is a no-op via a marker attribute on the wrapped method. Auto-attached when the corresponding `*_variant` field is set on `TrainingConfig`.
142
- - **gpt-oss `reasoning_effort` + `train_on_eot`.** `apply_reasoning_effort_prefix(messages, level)` injects `<|reasoning_effort|>{low,medium,high}<|/reasoning_effort|>` into the system turn (creates one if absent), returning a new list (caller's messages immutable). `build_assistant_only_labels(train_on_eot=True)` keeps the EOT/EOS token unmasked at the assistant-turn boundary so the model learns when to stop. Both gated to the SFT-family at config-load.
143
- - **+120 net new tests** (7722 7842) across `test_v0532.py`. Four review agents (python / code / security / tdd) ran; every CRITICAL / HIGH / MEDIUM / LOW finding fixed — separate `trust_remote_code` resolution for student vs teacher, idempotent attach hooks with regression tests, 1024-entry multi-label cap, `dpo_margin` defaults to `None` (not `0.0`) so missing values raise rather than silently zero, source-grep regression guards on the trainer-routing call sites use the full instantiation expression (no comment-only false-positives), Panel renders the classifier head instead of LoRA r/alpha.
144
- - **Local end-to-end CPU smoke** confirms both new wrappers train 2 steps with finite loss on `hf-internal-testing/tiny-random-gpt2`. Two real bugs surfaced and were fixed during the smoke (collator label padding + teacher / student device mismatch) — both have source-level regression guards in the test suite. ONNX export QA: pipeline integrity proven on tiny-gpt2; TinyLlama-1.1B full export is host-RAM-bound (documented in `tests/qa/v053_qa.md`).
139
+ - **`grpo_fp16: true` is now wired** through `GRPOTrainerWrapper._build_precision_kwargs` non-CUDA devices (CPU / MPS / XPU) skip HF Trainer's CUDA-specific `fp16`/`bf16` kwargs, CUDA + `grpo_fp16=True` forces FP16 for unsloth parity, default CUDA stays on bf16. The schema now rejects the silent-mutex combo `grpo_fp16=True + auto_mixed_precision=true` at config load with an actionable message naming both flags.
140
+ - **Vision GRPO base-model probe.** `KNOWN_VLM_REGEX` matches 10 VLM families (Qwen2-VL / Qwen2.5-VL / QVQ / Pixtral / InternVL / Llama-3.2-Vision / LLaVA / MiniCPM-V / Idefics / ShareGPT4V / Fuyu) with word-boundary anchors that reject substring noise like `"my-pixtralish"`. A YAML pairing `vision_grpo: true` with a non-VLM checkpoint is now rejected at schema load with a friendly message naming the expected families — instead of surfacing as a cryptic `"module has no attribute 'vision_tower'"` at trainer load time. Error message truncates the echoed base to 64 chars per the v0.34.0 redaction policy.
141
+ - **Validator ordering matters.** `_validate_grpo_fp16_amp_exclusive` short-circuits when `task != 'grpo'` so the v0.50.0 stability task-gate diagnosis ("`grpo_fp16` requires `task=grpo`") fires first regardless of validator execution orderkeeps the most actionable error in front of the user.
142
+ - **+37 net new tests** (7842 → 7879) across `test_v0533.py`. Four review agents (python / code / security / tdd) ran; every HIGH / MEDIUM / LOW finding fixed task-gate priority short-circuit, MPS device branch explicitly documented, 64-char error-message truncation, `Optional[object]` `object` parameter cleanup, QVQ regex coverage, 512-char exact-boundary regression test, explicit `grpo_fp16: false` produces bf16 path.
143
+ - **Local YAML smoke** confirms both fixes round-trip via `load_config_from_string`: known VLM bases pass, non-VLM bases plus `vision_grpo: true` are rejected, and the `grpo_fp16` + `auto_mixed_precision` combo is rejected with both flag names in the message.
145
144
 
146
145
  ## Why Soup?
147
146
 
@@ -43,14 +43,13 @@ soup train
43
43
 
44
44
  Latest highlights only. Full history: [GitHub Releases](https://github.com/MakazhanAlpamys/Soup/releases).
45
45
 
46
- **v0.53.2Modality II live trainers**: Four v0.52.0 deferred stubs lifted into real, end-to-end-trainable wrappersknowledge distillation, sequence classification, EBFT / GDPO loss kernels, and gpt-oss-style `reasoning_effort` system-prompt injection.
47
-
48
- - **`soup train` with `task: distill`.** New `DistillTrainerWrapper`: student + frozen teacher both load via `AutoModelForCausalLM` (separate `trust_remote_code` resolution for each), KL / forward_KL / reverse_KL / JS divergence kernels scaled by `temperature**2` per the Hinton paper. Device-bridge: teacher inputs auto-move to the teacher's device, teacher logits move back onto the student's device before the KL kernel survives HF Trainer's auto-CUDA promotion on a CPU-tagged run. `DataCollatorForSeq2Seq(label_pad_token_id=-100)` handles variable-length pre-tokenised loss-masked rows correctly.
49
- - **`soup train` with `task: classifier | reranker | cross_encoder`.** New `ClassifierTrainerWrapper`: `AutoModelForSequenceClassification` with `num_labels` and `label_names`, auto-routes `single_label_classification` / `multi_label_classification` from `tcfg.classifier_kind`. Multi-label string labels resolved via the `label_names` map with a 1024-entry cap + dedup. Training Setup Panel renders `Head: num_labels=N, kind=...` instead of LoRA r/alpha for the classifier family.
50
- - **EBFT structured / strided + GDPO standard / length_normalized / margin loss kernels.** `apply_ebft_loss` and `apply_gdpo_loss` exit the v0.52.0 `NotImplementedError` stubs with finite-only-input guards and bool-rejected numeric params. `attach_ebft_compute_loss(trainer, tcfg)` (SFT) and `attach_gdpo_compute_loss(trainer, tcfg)` (DPO) wrap `Trainer.compute_loss` idempotentlyre-attach is a no-op via a marker attribute on the wrapped method. Auto-attached when the corresponding `*_variant` field is set on `TrainingConfig`.
51
- - **gpt-oss `reasoning_effort` + `train_on_eot`.** `apply_reasoning_effort_prefix(messages, level)` injects `<|reasoning_effort|>{low,medium,high}<|/reasoning_effort|>` into the system turn (creates one if absent), returning a new list (caller's messages immutable). `build_assistant_only_labels(train_on_eot=True)` keeps the EOT/EOS token unmasked at the assistant-turn boundary so the model learns when to stop. Both gated to the SFT-family at config-load.
52
- - **+120 net new tests** (7722 7842) across `test_v0532.py`. Four review agents (python / code / security / tdd) ran; every CRITICAL / HIGH / MEDIUM / LOW finding fixed — separate `trust_remote_code` resolution for student vs teacher, idempotent attach hooks with regression tests, 1024-entry multi-label cap, `dpo_margin` defaults to `None` (not `0.0`) so missing values raise rather than silently zero, source-grep regression guards on the trainer-routing call sites use the full instantiation expression (no comment-only false-positives), Panel renders the classifier head instead of LoRA r/alpha.
53
- - **Local end-to-end CPU smoke** confirms both new wrappers train 2 steps with finite loss on `hf-internal-testing/tiny-random-gpt2`. Two real bugs surfaced and were fixed during the smoke (collator label padding + teacher / student device mismatch) — both have source-level regression guards in the test suite. ONNX export QA: pipeline integrity proven on tiny-gpt2; TinyLlama-1.1B full export is host-RAM-bound (documented in `tests/qa/v053_qa.md`).
46
+ **v0.53.3GRPO Plus partial wiring**: Two surgical GRPO Plus fixes from the v0.50.0 deferred-stub family land cleanly. The four larger items (stability callback, variant loss kernels, PRM trainer, multi-objective preference live combine) are scope-deferred to v0.53.4 each warrants a focused release because they each require deep TRL trainer subclassing.
47
+
48
+ - **`grpo_fp16: true` is now wired** through `GRPOTrainerWrapper._build_precision_kwargs` non-CUDA devices (CPU / MPS / XPU) skip HF Trainer's CUDA-specific `fp16`/`bf16` kwargs, CUDA + `grpo_fp16=True` forces FP16 for unsloth parity, default CUDA stays on bf16. The schema now rejects the silent-mutex combo `grpo_fp16=True + auto_mixed_precision=true` at config load with an actionable message naming both flags.
49
+ - **Vision GRPO base-model probe.** `KNOWN_VLM_REGEX` matches 10 VLM families (Qwen2-VL / Qwen2.5-VL / QVQ / Pixtral / InternVL / Llama-3.2-Vision / LLaVA / MiniCPM-V / Idefics / ShareGPT4V / Fuyu) with word-boundary anchors that reject substring noise like `"my-pixtralish"`. A YAML pairing `vision_grpo: true` with a non-VLM checkpoint is now rejected at schema load with a friendly message naming the expected families — instead of surfacing as a cryptic `"module has no attribute 'vision_tower'"` at trainer load time. Error message truncates the echoed base to 64 chars per the v0.34.0 redaction policy.
50
+ - **Validator ordering matters.** `_validate_grpo_fp16_amp_exclusive` short-circuits when `task != 'grpo'` so the v0.50.0 stability task-gate diagnosis ("`grpo_fp16` requires `task=grpo`") fires first regardless of validator execution orderkeeps the most actionable error in front of the user.
51
+ - **+37 net new tests** (7842 → 7879) across `test_v0533.py`. Four review agents (python / code / security / tdd) ran; every HIGH / MEDIUM / LOW finding fixed task-gate priority short-circuit, MPS device branch explicitly documented, 64-char error-message truncation, `Optional[object]` `object` parameter cleanup, QVQ regex coverage, 512-char exact-boundary regression test, explicit `grpo_fp16: false` produces bf16 path.
52
+ - **Local YAML smoke** confirms both fixes round-trip via `load_config_from_string`: known VLM bases pass, non-VLM bases plus `vision_grpo: true` are rejected, and the `grpo_fp16` + `auto_mixed_precision` combo is rejected with both flag names in the message.
54
53
 
55
54
  ## Why Soup?
56
55
 
@@ -9,7 +9,8 @@ We provide security updates for the following versions:
9
9
  - **Versions older than 3 minor versions:** No support
10
10
 
11
11
  Example:
12
- - v0.53.2 -- Full support (latest)
12
+ - v0.53.3 -- Full support (latest)
13
+ - v0.53.2 -- Full support
13
14
  - v0.53.1 -- Full support
14
15
  - v0.53.0 -- Full support
15
16
  - v0.52.0 -- Full support
@@ -148,6 +149,8 @@ No known critical vulnerabilities in current releases.
148
149
  - **v0.32.0 — Training Stability & Auto-Tuning**: `--find-lr-output` containment via shared `utils/paths.is_under_cwd` (prevents writes outside cwd); `save_lr_finder_report` rejects NaN / Infinity floats in `lrs` / `losses` and serialises with `allow_nan=False` (keeps the report parser-safe); `compute_lr_schedule` rejects non-positive `start_lr`, inverted ranges, and `num_steps` outside `[2, 10_000]`; `pick_mixed_precision` rejects empty / null-byte / >200-char model names and resolves multi-version quirks (`qwen2.5` vs `qwen2`, `phi-3.5` vs `phi-3`) by longest-substring-first iteration so an added family can never accidentally make a more-specific entry dead code; `compute_warmup_steps` clamps to `[10, 1000]` with a `ratio==0.0` short-circuit matching HF Trainer's "no warmup" convention; `SpikeRecoveryStrategy` is `@dataclass(frozen=True)` (post-construction mutation cannot bypass validation), `max_attempts ∈ [1, 10]`, `lr_decay ∈ (0, 1)`, `min_lr > 0`; cross-validator `_validate_spike_recovery_requires_watchdog` rejects `loss_spike_recovery=true, loss_watchdog=false` at config-load (fails fast instead of never triggering); `convergence_window ∈ [5, 10_000]`, `convergence_rel_tol ∈ (0, 1]`, `recommend_action` reuses `detect_plateau` so plateau heuristic stays single-source-of-truth; `GradAccumMonitor.recommend()` caps doubled `accum` at `MAX_ACCUM=1024` so a runaway advisory loop cannot blow up DataLoader prefetch; `generate_config` validates BOTH the YAML output path AND the embedded `decisions["output"]` field via `is_under_cwd` (closes the gap where a crafted `decisions["output"]="../../etc"` would have silently propagated into the rendered YAML)
149
150
  - **v0.34.0 — Observability & Dev UX**: `.crash` bundle generator (`utils/crash.py`) recursively redacts `hf_*` / `sk-*` / `Bearer …` token-shaped strings in any captured `config` and metric tail before serialisation, so a `.crash` file shared on a public GitHub issue cannot leak credentials; `output_dir` is reduced to `os.path.basename` so `$HOME` doesn't leak; `write_crash_bundle` uses `os.path.realpath + commonpath` for cwd containment (Windows-safe; raises `ValueError` not `PermissionError` so callers cannot silently swallow with `except OSError`); filename appends `secrets.token_hex(4)` so two crashes in the same UTC second don't collide; bundle truncated to `MAX_BUNDLE_BYTES=1_000_000`. `train.py` crash-write surfaces failures to the user (no silent missing-bundle). `profiling.py` `resolve_trace_path` rejects empty / `.` / `..` / `/` / `\\` / null-byte `run_id` (closes the `output_dir/profiles/../trace.json` escape) and uses `os.path.realpath + is_under_cwd`; profiles dir is created only on successful torch import (no stale empty dirs on torch-less CI). `tracker.get_run` LIKE-prefix match escapes `%` / `_` / `\\` and uses `ESCAPE '\\'` so a crafted `run_id` cannot widen the match (mirrors v0.26.0 registry policy). Lazy schema migration (`_ensure_schema`) tolerates the "duplicate column" race when two CLI processes start simultaneously on a fresh DB (fork-based multi-GPU training, TUI auto-refresh). `runs.py show/replay/clean` switched user `run_id` rendering to `markup_escape` and switched `clean` containment from broken `Path.resolve() + relative_to()` to project-standard `os.path.realpath + is_under_cwd`. `tui_app.py` lazy-imports `ExperimentTracker` and `markup_escape`s every DB-sourced string before passing into Textual widgets so a crafted base_model / experiment_name cannot inject `[bold red]…[/]` markup. `run_cost.estimate_run_cost_usd` rejects `bool` in `num_gpus` (bool is a subclass of int — same defence as v0.30.0 `Candidate.__post_init__`); duration clamped to `[0, 1 year]`; unknown GPU returns `None` so callers render `—` instead of fabricating `$0.00`. `log_level.parse_log_level` rejects non-string + null-byte input.
150
151
  - **v0.33.0 — Live Wire**: RLVR `code_exec_reward` adds OS-level isolation (Linux best-effort `os.unshare(CLONE_NEWUSER|CLONE_NEWNET|CLONE_NEWPID)`, macOS `sandbox-exec` with default-deny `MACOS_SANDBOX_PROFILE` narrowed to a 3-name `mach-lookup` allowlist to prevent DNS / NSURLSession bypass of `(deny network*)`); `prune_checkpoints` switches to TOCTOU-safe `os.lstat + S_ISLNK` + `shutil.rmtree(onerror=_abort_on_symlink)` so a symlink encountered mid-walk aborts rather than escapes; `run_gate` wraps each task scorer in a typed `try/except` so backend failures produce `score=None, error=str(exc)` (never silent `score=1.0`); `_parse_judge_url` removes the bare `http://` catch-all (defence-in-depth after the Pydantic GateTask validator); `soup can run` requires `--yes` or explicit consent callback and raises `ValueError` (not `PermissionError`, which is an `OSError` subclass that broad `except` blocks would swallow); GGUF `rglob` result for ollama deploy is `realpath+commonpath` checked against extract_dir (prevents symlink escape from a crafted can); `DeployTarget.path` validator normalises mixed `\\`/`/` separators before splitting (closes a Windows `..` bypass); `CAN_FORMAT_VERSION` 1→2 (additive — v1 still loads); `soup can publish` validates `repo_id` via `utils/hf.validate_repo_id`, resolves token via `resolve_token`, sanitises commit messages (first-line, 200-char cap), uses HTTPS-only HfApi; `_write_spike_recovery_hint` adds `is_under_cwd` containment check on `args.output_dir` from raw HF `TrainingArguments`; `lookup_entry_by_output_dir` emits `ResourceWarning` when 1000-row scan limit is hit (no silent miss); `CrossDocCollator` no longer mutates input feature dicts (HF Dataset rows are cached and reused — mutation broke subsequent batches); `Candidate` rejects `bool` in `score`/`latency_ms` (was sneaking past `int` isinstance check); `evaluate_candidate` latency mean now divides by *completed* prompts (excludes crashed) so a broken candidate isn't artificially fast; `auto_quant.run_auto_quant_picker` soft-falls-back to highest-scored candidate when no candidate clears `min_score` (server still binds); `build_logits_processors` returns `[]` when neither `outlines` nor `lm-format-enforcer` is installed (server degrades to free-form rather than 500); MII server uses loopback-only CORS, max_tokens cap [1, 16384], stream rejection, generic 500 with no stack-trace leak; `os.execvp` auto-reexec uses list args (no shell), all forwarded flags pre-validated; `cleanup_extract_dir` uses `os.path.commonpath` (Windows-safe) instead of `startswith`; `_run_subprocess` catches `TimeoutExpired` and returns rc=124 (coreutils convention) instead of an unhandled traceback; new `eval_results` and `tensorrt` artifact kinds in `RegistryStore._VALID_KINDS`
152
+ - **v0.53.3 — GRPO Plus partial wiring (#128 grpo_fp16, #129 vision-VLM probe)**: lifts two surgical v0.50.0 GRPO Plus deferred stubs while keeping the project's hardening invariants; the four larger items (#127 stability callback, #123 6 GRPO variant loss kernels, #126 PRMTrainerWrapper, #68 multi-objective preference live combine) are scope-deferred to v0.53.4. (#128 grpo_fp16 routing) New `_validate_grpo_fp16_amp_exclusive` SoupConfig cross-validator rejects the silent-mutex combo `grpo_fp16=True + auto_mixed_precision=True` at config load — both flags pick the mixed-precision dtype via different codepaths; combining them is a footgun where downstream behaviour depends on validator execution order. Cross-validator short-circuits when `task != 'grpo'` so the v0.50.0 stability task-gate diagnosis fires first (keeps the most actionable error at the front; code-review HIGH fix). New `GRPOTrainerWrapper._build_precision_kwargs(self) -> dict[str, bool]` returns the `{fp16, bf16}` HF kwargs per `(device, grpo_fp16)` matrix: non-CUDA (CPU / MPS / XPU) → both False (HF Trainer's fp16/bf16 kwargs are CUDA-specific, MPS / XPU use their own mixed-precision paths), CUDA + `grpo_fp16=True` → `fp16=True, bf16=False` (unsloth parity), default CUDA → `fp16=False, bf16=True` (legacy v0.50.0 path). Direct attribute access on `self.config.training.grpo_fp16` (no `getattr` fallback — Pydantic-guaranteed field). (#129 vision-GRPO base probe) New `soup_cli/utils/prm.KNOWN_VLM_REGEX` compiled regex with 10 word-boundary alternatives covering Qwen2-VL / Qwen2.5-VL / QVQ / Pixtral / InternVL / InternVL2_5 / InternVL3 / Llama-3.2-Vision (any size via `[a-z0-9._-]*vision` glob) / LLaVA / MiniCPM-V / Idefics / ShareGPT4V / Fuyu. Word-boundary idiom `(?:^|[^a-z0-9])…(?:[^a-z0-9]|$)` mirrors v0.39.0 `is_gemma4_model` / v0.44.0 `is_llama4_model` / v0.49.0 `is_llama_model` policy — rejects substring noise like `"my-pixtralish"`. New `is_known_vlm_base(name: object) -> bool` is defensive — returns False (never raises) on non-string / bool / empty / null-byte / `>_MAX_BASE_NAME_LEN=512`. Extended `validate_vision_grpo_compat` with optional `base: str | None = None` kwarg — `None` / empty-string skips the probe (back-compat for legacy v0.50.0 Part E callers); non-empty-non-VLM raises `ValueError` with friendly message naming the expected families (Qwen2-VL / Pixtral / InternVL / Llama-3.2-Vision / LLaVA / MiniCPM-V). Error message **truncates the echoed `base` to 64 chars** before serialisation (security-review MEDIUM fix mirroring v0.34.0 `crash.py` `output_dir` basename policy — defends against adversarial / long bases bloating error logs and from leaking unredacted user input into operator-facing tracebacks). `_validate_vision_grpo` in SoupConfig threads `base=self.base` so a YAML pairing `vision_grpo: true` with a non-VLM checkpoint is rejected at schema-load instead of surfacing as a cryptic `"module has no attribute 'vision_tower'"` runtime error. Test surface: 1 new test file (`test_v0533.py`) carrying 37 new tests covering: every `_build_precision_kwargs` matrix cell (CUDA + grpo_fp16 / default CUDA / CPU / MPS), every cross-validator branch (mutex rejection / task-gate priority / both-off pass), every regex alternative (Qwen2-VL / Pixtral / QVQ / Llama-3.2-Vision variants / negative matches), every defensive guard (bool / non-string / null-byte / 512-byte boundary), error-message truncation (security-review M regression), and end-to-end YAML load (happy + reject). Known limitations: (1) Scope-deferred — 4 larger v0.53.3 items moved to v0.53.4 because each requires deep TRL subclassing and warrants its own focused release; the v0.40.x stub-then-live cadence shipped 5 patch releases over 6 weeks, mirroring that here. (2) VLM allowlist is static name-regex only; a legitimate VLM published under an org whose checkpoint name lacks any of those tokens (e.g. a custom internal fork) is rejected at schema-load and operators must omit `vision_grpo: true` until a future release adds a runtime `model.config.vision_config` probe. (3) `_build_precision_kwargs` is GRPO-only — other RL trainers (PPO / RewardModel) follow their existing mixed-precision conventions. (v0.53.3)
153
+
151
154
  - **v0.53.2 — Modality II live trainers**: lifts four v0.52.0 deferred stubs (#137, #135, #133, #132) into real trainer wrappers while keeping the project's hardening invariants. (#137 reasoning_effort + train_on_eot) `apply_reasoning_effort_prefix` follows v0.41.0 / v0.51.0 validator policy (bool-first, null-byte / empty / oversize / case-insensitive normalisation); messages list is treated as immutable (returns a new list — matches v0.33.0 #47 `CrossDocCollator` policy). `build_assistant_only_labels(train_on_eot=True)` reuses the existing v0.36.0 mask infrastructure — same null-byte / max_length / bool guards. (#135 EBFT / GDPO) `apply_ebft_loss` and `apply_gdpo_loss` enforce **finite-only inputs** (`torch.isfinite` guard on tensor inputs + `math.isfinite` on scalar params) — NaN / Inf would silently corrupt training otherwise. `dpo_margin` defaults to `None` (not `0.0`) per security-review M3 fix: silent zeroing in the `margin` variant when the operator forgot to set the margin would have looked like training success but produced a meaningless gradient. Both attach hooks (`attach_ebft_compute_loss`, `attach_gdpo_compute_loss`) are **idempotent** via a marker attribute on the wrapped method — re-attach is a no-op and a dedicated test class verifies the invariant (code-review M2 fix). (#133 DistillTrainerWrapper) **Separate trust_remote_code resolution for student and teacher** (security-review L2 fix): `model_requires_trust_remote_code(teacher)` runs independently of the student probe, otherwise a malicious teacher could piggy-back on the student's opt-in. Teacher is loaded with `device_map="cpu" if device == "cpu" else "auto"`, frozen via `requires_grad_(False)` + `.eval()` immediately after load — never participates in gradient computation. `_DistillTrainer.compute_loss` device-bridge: `teacher_device = next(teacher_ref.parameters()).device`, `teacher_inputs.to(teacher_device)` before teacher forward, `teacher_logits.to(student_logits.device)` before KL kernel — defends against HF Trainer's auto-CUDA promotion silently producing cross-device `index_select` crashes. **DataCollator correctness fix** (surfaced during Wave 3 CPU smoke): `DataCollatorForLanguageModeling` does NOT pad pre-tokenised `labels` — switched to `DataCollatorForSeq2Seq(label_pad_token_id=-100, padding=True)` so variable-length loss-masked rows batch correctly without runtime crash. (#132 ClassifierTrainerWrapper) `_normalise_label` caps multi-label entries at **1024 per row** (matches v0.52.0 schema cap; security-review HIGH fix — unbounded would allow OOM via crafted JSONL), dedups via set conversion, validates `label_names` map entries reject null bytes + empty strings. `problem_type` is set explicitly from `tcfg.classifier_kind` (not silently inferred from labels) so a multi-label-shaped row in a single-label config raises rather than mis-trains. Training Setup Panel renders `Head: num_labels=N, kind=...` for classifier-family tasks instead of meaningless LoRA r/alpha lines (code-review L3 cosmetic fix — Panel no longer mis-represents what the wrapper is doing). (Cross-cutting) `commands/train.py` task routing branches added for `distill` and `classifier` / `reranker` / `cross_encoder` — source-grep regression guards in the test suite use the **full instantiation expression** `DistillTrainerWrapper(cfg, **trainer_kwargs)` so comment-only mentions of the class name cannot satisfy the regression check (TDD-review hardening). Both new factories (`build_distill_trainer`, `build_classifier_trainer`) reject unknown kwargs via Python signature contract — dedicated `pytest.raises(TypeError)` tests cover the path (TDD-review L1 fix). Test surface: 1 new test file (`test_v0532.py`) carrying 120 new tests across 14 classes. Known limitations: (1) `#71` TinyLlama-1.1B-LoRA full ONNX export is host-RAM-bound (≥16 GB free RAM needed for the `onnx.load(load_external_data=True)` post-process step); tiny-gpt2 smoke proves pipeline integrity — recorded in `tests/qa/v053_qa.md`. (2) Distillation supports same-tokenizer pairs only — cross-tokenizer (Llama → Qwen) needs a projection or sequence-level loss, out of scope. (3) Classifier wrapper has no LoRA path — full head + base training; LoRA classifier finetuning is a follow-up. (4) EBFT / GDPO auto-attach only fires when the corresponding `*_variant` field is set; manual `attach_*` invocation from custom training loops is supported and idempotent. (5) `reasoning_effort` injection happens at data-prep time inside `build_format_row`; changing the level between runs requires re-rendering the dataset. (v0.53.2)
152
155
 
153
156
  - **v0.53.1 — Quant Menu II + Export pipeline live**: lifts six v0.53.0 deferred stubs to live wiring while keeping the project's hardening invariants. New shared helper `soup_cli/utils/paths.enforce_under_cwd_and_no_symlink` consolidates the v0.33.0 #22 TOCTOU pattern (cwd containment via `os.path.realpath + os.path.commonpath` + `os.lstat + S_ISLNK` rejection) — used by `commands/merge.py`, `commands/export.py`, `utils/save_formats.py`, and `utils/gguf_quant.py` so the same boundary check fires at every CLI dispatch point. `merge_4bit` and `export_torchao` (`utils/save_formats.py`): cwd containment + symlink rejection on `merged_dir` / `model_dir` / `output_dir`; `load_quant_config` enforces `yaml.safe_load` only + 256 KB cap + extension allowlist (`.yaml`/`.yml`); **per-scheme closed kwarg allowlist** rejects dunder keys + unknown params before the splat into `torchao.<scheme>Config(**kwargs)` (security-review HIGH fix — `Int4WeightOnly` accepts `{group_size, inner_k_tiles}`, `NVFP4` accepts nothing extra). Corrected BNB-4bit skip-modules kwarg name from `llm_int8_skip_modules` to `bnb_4bit_skip_modules`. `export_advanced_gguf` (`utils/gguf_quant.py`): all three subprocess invocations (`convert_hf_to_gguf.py`, `llama-imatrix`, `llama-quantize`) use argv-list form with no shell, 30-min timeout, `sys.executable` for the convert script; `_run_convert_to_f16` realpath-verifies that `convert_hf_to_gguf.py` stays inside the `llama_cpp_dir` after resolution (security-review HIGH M5 fix — defends against a symlinked script escape). `_prepare_calibration_text` strips null bytes, collapses newlines to spaces, caps per-line at 8 KB + total at 50 MB (security-review M1), uses POSIX `O_NOFOLLOW` to refuse symlinks at the kernel level (security-review M3 — closes the TOCTOU window between the dispatch-time check and the actual `open()`); requires ≥ 1 usable row before invoking imatrix. `_safe_stderr` Rich-markup-escapes subprocess stderr before embedding in `RuntimeError` (security-review L4) so a crafted llama.cpp error cannot inject `[red]...[/]` into the operator-facing panel. UD-prefix stripped from flavour arg before passing to llama-quantize (`UD-Q4_K_XL` → `Q4_K_XL`). Calibration data path containment + symlink rejection fires at CLI dispatch in `commands/export.py::_export_gguf_advanced`. `detect_prequantized_format_from_path` (`autopilot/decisions.py`): cwd containment + `os.lstat + S_ISLNK` on `<model_dir>/config.json` (security-review HIGH H2 — out-of-cwd model paths silently return `None` to preserve soft-probe semantics so HF Hub repo IDs aren't rejected); null-byte rejection on `model_dir`. `commands/merge.py`: early `is_under_cwd(output)` check at CLI boundary (security-review M4) — consistent with the v0.20.0 / v0.40.2 containment-at-the-boundary policy. `deploy_measure.py`: cache file written atomically via `tempfile.mkstemp` + `os.replace` with `os.lstat + S_ISLNK` rejection on BOTH `load_cache` and `save_cache` (security-review M2 — was missing on the load side); env override `SOUP_DEPLOY_AUTOPILOT_CACHE` rejects null bytes + control chars before any path resolution and confines the override to home / cwd / tempdir; cache file gets best-effort 0o600 perms on POSIX (matches v0.26.0 registry.db policy); 1 MB cache-file cap. `_DEPLOY_MEASURE_BEFORE_GEN` / `_AFTER_FACTORY` module-level callables are documented as a non-public escape hatch (deferred until v0.46.1 live model-loader). Test surface: 4 new test files (`test_v0531_82.py` / `test_v0531_109.py` / `test_v0531_139.py` / `test_v0531_142.py`) carrying 112 new tests covering happy paths + failure modes + every security guard (POSIX symlink rejection, per-scheme kwarg allowlist, TOCTOU defences, `_MAX_CANDIDATES` cap, MINOR-verdict band, mxfp4 word boundary, BNB-alias detection, render-table markup escape). Known limitations: (1) `_DEPLOY_MEASURE_BEFORE_GEN` / `_AFTER_FACTORY` are a stop-gap until v0.46.1 ships first-party transformers / vLLM generator factories. (2) `#70` GGUF and `#72` AWQ/GPTQ manual QA smokes remain pending — require CUDA + llama.cpp build; recipes scripted in `tests/qa/v053_qa.md`. (3) BNB-4bit merge + TorchAO PTQ live happy-path is mock-covered only — CPU-only CI cannot execute the real BNB / torchao kernels. (4) `_prepare_calibration_text` accepts JSONL with `text` / `prompt` / `content` aliases + raw text fallback; other formats (parquet / markdown) are out of scope. (5) Cache key truncates `base_sha` to 16 hex chars at the call site (collision probability ≈ 1-in-2³² across ~4 billion entries). (6) Pre-quantized detection is heuristic — name regex + local `config.json` probe; HF Hub repo IDs without local download fall back to name-only matching. (7) `enforce_under_cwd_and_no_symlink` checks only the leaf path; deeper traversal relies on the per-file leaf check at each site. (v0.53.1)
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "soup-cli"
7
- version = "0.53.2"
7
+ version = "0.53.3"
8
8
  description = "Fine-tune LLMs in one command. No SSH, no config hell."
9
9
  readme = "README.md"
10
10
  license = "Apache-2.0"
@@ -1,3 +1,3 @@
1
1
  """Soup CLI — Fine-tune LLMs in one command."""
2
2
 
3
- __version__ = "0.53.2"
3
+ __version__ = "0.53.3"
@@ -2512,6 +2512,7 @@ class SoupConfig(BaseModel):
2512
2512
  task=self.task,
2513
2513
  modality=self.modality,
2514
2514
  backend=self.backend,
2515
+ base=self.base, # v0.53.3 #129 — name-regex VLM probe
2515
2516
  )
2516
2517
  except ValueError as exc:
2517
2518
  raise ValueError(str(exc)) from exc
@@ -2557,6 +2558,35 @@ class SoupConfig(BaseModel):
2557
2558
  )
2558
2559
  return self
2559
2560
 
2561
+ @model_validator(mode="after")
2562
+ def _validate_grpo_fp16_amp_exclusive(self) -> "SoupConfig":
2563
+ """v0.53.3 #128 — ``grpo_fp16`` and ``auto_mixed_precision`` are
2564
+ mutually exclusive.
2565
+
2566
+ Both flags pick the mixed-precision dtype but go through different
2567
+ codepaths (``grpo_fp16`` forces ``fp16=True, bf16=False`` on
2568
+ GRPOConfig directly; ``auto_mixed_precision`` runs the v0.32.0
2569
+ per-model + per-GPU picker). Combining them is a footgun where the
2570
+ downstream behaviour depends on order-of-evaluation — fail fast at
2571
+ config-load with a friendly message naming both flags so the user
2572
+ picks one.
2573
+ """
2574
+ # Short-circuit when task is not 'grpo' so the v0.50.0 stability
2575
+ # task-gate error fires first (code-review HIGH fix — keeps a
2576
+ # consistent "wrong-task" diagnosis ahead of the mutual-exclusion
2577
+ # one, regardless of validator execution order).
2578
+ if self.task != "grpo":
2579
+ return self
2580
+ if self.training.grpo_fp16 and self.training.auto_mixed_precision:
2581
+ raise ValueError(
2582
+ "grpo_fp16=True and auto_mixed_precision=True are mutually "
2583
+ "exclusive — both pick the mixed-precision dtype but go "
2584
+ "through different codepaths. Pick one: grpo_fp16 forces "
2585
+ "FP16 (unsloth parity), auto_mixed_precision uses the "
2586
+ "v0.32.0 per-GPU picker."
2587
+ )
2588
+ return self
2589
+
2560
2590
  @model_validator(mode="after")
2561
2591
  def _validate_hub_supported(self) -> "SoupConfig":
2562
2592
  """v0.51.0 Part E — ``hub`` other than ``hf`` requires a non-mlx
@@ -54,6 +54,32 @@ class GRPOTrainerWrapper:
54
54
  self.tokenizer = None
55
55
  self.trainer = None
56
56
 
57
+ def _build_precision_kwargs(self) -> dict[str, bool]:
58
+ """Resolve fp16/bf16 kwargs for GRPOConfig (v0.53.3 #128).
59
+
60
+ Priority:
61
+ - Non-CUDA device (CPU / MPS / XPU) → no mixed precision (both
62
+ False). HF Trainer's fp16/bf16 kwargs are CUDA-specific; non-CUDA
63
+ backends must use their own mixed-precision path (MPS Metal,
64
+ XPU IPEX). Documented explicitly so future MPS work doesn't
65
+ regress this branch silently.
66
+ - ``grpo_fp16=True`` (CUDA) → ``fp16=True, bf16=False`` (unsloth
67
+ parity).
68
+ - Default CUDA → ``fp16=False, bf16=True`` (legacy v0.50.0 path).
69
+
70
+ ``auto_mixed_precision`` is mutually exclusive with ``grpo_fp16``
71
+ (rejected at schema load via ``_validate_grpo_fp16_amp_exclusive``);
72
+ when only ``auto_mixed_precision`` is set, the v0.32.0 picker runs
73
+ elsewhere in the training loop and overrides this default.
74
+ """
75
+ if self.device != "cuda":
76
+ return {"fp16": False, "bf16": False}
77
+ # grpo_fp16 is a Pydantic field with default=False; direct attribute
78
+ # access (no getattr fallback) so a typo would fail loudly.
79
+ if self.config.training.grpo_fp16:
80
+ return {"fp16": True, "bf16": False}
81
+ return {"fp16": False, "bf16": True}
82
+
57
83
  def setup(self, dataset: dict):
58
84
  """Load model, tokenizer, apply LoRA, create GRPO trainer."""
59
85
  from datasets import Dataset
@@ -166,7 +192,7 @@ class GRPOTrainerWrapper:
166
192
  "logging_steps": tcfg.logging_steps,
167
193
  "save_steps": tcfg.save_steps,
168
194
  "save_total_limit": 3,
169
- "bf16": self.device == "cuda",
195
+ **self._build_precision_kwargs(),
170
196
  "report_to": self.report_to,
171
197
  "remove_unused_columns": False,
172
198
  "deepspeed": self.deepspeed_config,
@@ -0,0 +1,175 @@
1
+ """PRM (Process Reward Model) — v0.50.0 Part E + v0.53.3 #129.
2
+
3
+ Schema helpers for the new ``task='prm'`` stepwise-supervised trainer.
4
+ The PRM data format (``data.format='prm'``) was schema-locked in v0.42.0
5
+ Part A; v0.50.0 promotes it to a first-class task with cross-validators.
6
+
7
+ v0.53.3 #129 extends :func:`validate_vision_grpo_compat` with an optional
8
+ ``base`` model name probe (``KNOWN_VLM_REGEX``) so a config that pairs
9
+ ``vision_grpo: true`` with a non-VLM checkpoint is rejected at schema-load
10
+ with an actionable message naming a known VLM family.
11
+
12
+ The actual PRM trainer wrapper (``soup_cli/trainer/prm.py``) is deferred
13
+ to v0.50.1 — mirrors v0.27.0 MII / v0.37.0 multipack / v0.41.0 LLaMA Pro /
14
+ v0.45.0 plugins / v0.49.0 LongLoRA stub-then-live pattern.
15
+
16
+ Security:
17
+ - Pure schema-time validation; no filesystem touch.
18
+ - All validators raise ``ValueError`` with actionable messages.
19
+ - Name-regex probe rejects null-byte / non-string / oversize inputs by
20
+ returning ``False`` (no exception — mirrors v0.39.0 ``is_gemma4_model``
21
+ / v0.44.0 ``is_llama4_model`` / v0.49.0 ``is_llama_model`` policy).
22
+ """
23
+
24
+ from __future__ import annotations
25
+
26
+ import re
27
+
28
+ # v0.53.3 #129 — case-insensitive name allowlist for VLM bases.
29
+ # Each alternative uses word-style boundaries so substring noise like
30
+ # ``"my-pixtralish"`` does not match. The list is deliberately small and
31
+ # additive — extending it does not break callers because callers always
32
+ # pass through :func:`is_known_vlm_base`.
33
+ _VLM_PATTERNS = (
34
+ r"(?:^|[^a-z0-9])qwen[\d.]*-vl(?:[^a-z0-9]|$)", # Qwen2-VL / Qwen2.5-VL
35
+ r"(?:^|[^a-z0-9])qvq(?:[^a-z0-9]|$)", # QVQ-72B
36
+ r"(?:^|[^a-z0-9])pixtral(?:[^a-z0-9]|$)", # Pixtral
37
+ r"(?:^|[^a-z0-9])internvl[\d._]*(?:[^a-z0-9]|$)", # InternVL/InternVL2_5/InternVL3
38
+ # Llama-3.2-Vision (any size in between, e.g. Llama-3.2-11B-Vision)
39
+ r"(?:^|[^a-z0-9])llama-?3\.?2[a-z0-9._-]*vision(?:[^a-z0-9]|$)",
40
+ r"(?:^|[^a-z0-9])llava(?:[^a-z0-9]|$)", # LLaVA
41
+ r"(?:^|[^a-z0-9])minicpm-?v(?:[^a-z0-9]|$)", # MiniCPM-V
42
+ r"(?:^|[^a-z0-9])idefics[\d]*(?:[^a-z0-9]|$)", # Idefics
43
+ r"(?:^|[^a-z0-9])sharegpt4v(?:[^a-z0-9]|$)", # ShareGPT4V
44
+ r"(?:^|[^a-z0-9])fuyu(?:[^a-z0-9]|$)", # Fuyu
45
+ )
46
+ KNOWN_VLM_REGEX = re.compile("|".join(_VLM_PATTERNS), re.IGNORECASE)
47
+
48
+ _MAX_BASE_NAME_LEN = 512
49
+
50
+
51
+ def is_known_vlm_base(name: object) -> bool:
52
+ """Best-effort check whether ``name`` matches a known VLM family.
53
+
54
+ Returns ``False`` (never raises) on any of: non-string, empty, null
55
+ byte, length > 512. Match is case-insensitive with word boundaries so
56
+ substring noise (``"my-pixtralish"``) does not false-positive — mirrors
57
+ v0.39.0 / v0.44.0 / v0.49.0 model-detection policy.
58
+ """
59
+ if isinstance(name, bool):
60
+ return False
61
+ if not isinstance(name, str):
62
+ return False
63
+ if not name:
64
+ return False
65
+ if "\x00" in name:
66
+ return False
67
+ if len(name) > _MAX_BASE_NAME_LEN:
68
+ return False
69
+ return KNOWN_VLM_REGEX.search(name) is not None
70
+
71
+
72
+ def validate_prm_compat(
73
+ *,
74
+ task: str,
75
+ data_format: str,
76
+ backend: str,
77
+ modality: str,
78
+ ) -> None:
79
+ """Schema-time gate for ``task='prm'``.
80
+
81
+ Rejects:
82
+ - non-PRM task (the function is intended to be called only when
83
+ ``task == 'prm'``; defence-in-depth).
84
+ - ``data.format`` not in ``{'prm', 'auto'}`` — PRM requires the
85
+ stepwise-supervised data shape from v0.42.0 Part A.
86
+ - ``backend='mlx'`` — PRM trainer is HF Trainer-specific.
87
+ - ``modality != 'text'`` — vision/audio PRM not modelled.
88
+ """
89
+ if not isinstance(task, str) or not task:
90
+ raise ValueError("task must be a non-empty string")
91
+ if task != "prm":
92
+ raise ValueError(
93
+ f"validate_prm_compat called with task={task!r} (expected 'prm')"
94
+ )
95
+ if not isinstance(data_format, str) or not data_format:
96
+ raise ValueError("data.format must be a non-empty string")
97
+ if data_format not in ("prm", "auto"):
98
+ raise ValueError(
99
+ f"task='prm' requires data.format in ('prm', 'auto'); "
100
+ f"got data.format={data_format!r}"
101
+ )
102
+ if backend == "mlx":
103
+ raise ValueError(
104
+ "task='prm' is not supported on backend=mlx in v0.50.0"
105
+ )
106
+ if modality != "text":
107
+ raise ValueError(
108
+ f"task='prm' requires modality='text'; got modality={modality!r}"
109
+ )
110
+
111
+
112
+ def validate_vision_grpo_compat(
113
+ *,
114
+ task: str,
115
+ modality: str,
116
+ backend: str,
117
+ base: str | None = None,
118
+ ) -> None:
119
+ """Schema-time gate for ``vision_grpo=True``.
120
+
121
+ Rejects on:
122
+ - task not in {'grpo', 'ppo'} (vision RL is only meaningful for RL);
123
+ - modality != 'vision' (the whole point of the flag);
124
+ - backend == 'mlx' (no VLM-RL on MLX);
125
+ - v0.53.3 #129: ``base`` (when supplied, non-empty) does not match a
126
+ known VLM family — the runtime trainer error would be cryptic
127
+ ("module has no attribute 'vision_tower'") so we surface a friendly
128
+ schema-load rejection naming the expected families instead.
129
+
130
+ ``base=None`` or empty-string skips the probe (backwards-compatible —
131
+ legacy callers from v0.50.0 Part E pass no ``base`` kwarg).
132
+ """
133
+ if not isinstance(task, str) or not task:
134
+ raise ValueError("task must be a non-empty string")
135
+ if task not in ("grpo", "ppo"):
136
+ raise ValueError(
137
+ f"vision_grpo requires task in ('grpo', 'ppo'); got task={task!r}"
138
+ )
139
+ if modality != "vision":
140
+ raise ValueError(
141
+ f"vision_grpo requires modality='vision'; got modality={modality!r}"
142
+ )
143
+ if backend == "mlx":
144
+ raise ValueError(
145
+ "vision_grpo is not supported on backend=mlx in v0.50.0"
146
+ )
147
+ # v0.53.3 #129 — name-regex probe (deliberately permissive: empty /
148
+ # None / non-string skips the probe).
149
+ if isinstance(base, str) and base and not is_known_vlm_base(base):
150
+ # Truncate the echoed value to keep adversarial / long bases from
151
+ # bloating error logs (security review fix; mirrors v0.34.0 crash
152
+ # redaction policy).
153
+ safe_base = base if len(base) <= 64 else base[:61] + "..."
154
+ raise ValueError(
155
+ f"vision_grpo=True requires a known VLM base; got base={safe_base!r}. "
156
+ "Expected one of the Qwen2-VL / Pixtral / InternVL / "
157
+ "Llama-3.2-Vision / LLaVA / MiniCPM-V families. If your base "
158
+ "is a legitimate VLM not in the allowlist, omit vision_grpo "
159
+ "until a future release adds a runtime config-probe path."
160
+ )
161
+
162
+
163
+ def build_prm_trainer() -> None:
164
+ """Live PRM trainer factory — deferred to v0.50.1.
165
+
166
+ Planned v0.50.1 signature:
167
+ ``build_prm_trainer(*, config, model, tokenizer, train_dataset, eval_dataset)``.
168
+
169
+ Raises ``NotImplementedError`` so callers cannot silently train an
170
+ SFT model when they asked for PRM.
171
+ """
172
+ raise NotImplementedError(
173
+ "PRM trainer (task='prm') live wiring deferred to v0.50.1. "
174
+ "Schema accepts the value but no trainer wrapper is registered yet."
175
+ )
@@ -92,9 +92,12 @@ def test_prm_compat_none_format():
92
92
 
93
93
 
94
94
  def test_vision_grpo_soupconfig_ppo_happy():
95
- """tdd-guide MEDIUM fix: confirm ppo path is wired at SoupConfig level."""
95
+ """tdd-guide MEDIUM fix: confirm ppo path is wired at SoupConfig level.
96
+
97
+ v0.53.3 #129 — uses a known VLM base so the new name-regex probe passes.
98
+ """
96
99
  yaml = """
97
- base: test-llama
100
+ base: Qwen/Qwen2-VL-7B-Instruct
98
101
  task: ppo
99
102
  modality: vision
100
103
  data:
@@ -221,8 +224,9 @@ training:
221
224
 
222
225
 
223
226
  def test_vision_grpo_soupconfig_happy():
227
+ # v0.53.3 #129 — uses a known VLM base.
224
228
  yaml = """
225
- base: test-llama
229
+ base: Qwen/Qwen2-VL-7B-Instruct
226
230
  task: grpo
227
231
  modality: vision
228
232
  data: