soup-cli 0.53.2__tar.gz → 0.53.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (525) hide show
  1. {soup_cli-0.53.2 → soup_cli-0.53.4}/CONTRIBUTING.md +1 -1
  2. {soup_cli-0.53.2 → soup_cli-0.53.4}/PKG-INFO +31 -10
  3. {soup_cli-0.53.2 → soup_cli-0.53.4}/README.md +30 -9
  4. {soup_cli-0.53.2 → soup_cli-0.53.4}/SECURITY.md +6 -1
  5. {soup_cli-0.53.2 → soup_cli-0.53.4}/pyproject.toml +1 -1
  6. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/__init__.py +1 -1
  7. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/config/schema.py +30 -0
  8. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/trainer/grpo.py +27 -1
  9. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/trainer/pretrain.py +7 -0
  10. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/trainer/sft.py +11 -0
  11. soup_cli-0.53.4/soup_cli/utils/block_expansion.py +249 -0
  12. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/errors.py +8 -2
  13. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/flash_attn.py +25 -0
  14. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/long_context.py +23 -2
  15. soup_cli-0.53.4/soup_cli/utils/longlora.py +247 -0
  16. soup_cli-0.53.4/soup_cli/utils/prm.py +175 -0
  17. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/qa/v053_qa.md +59 -0
  18. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_errors.py +2 -1
  19. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_v0410_part_c.py +4 -2
  20. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_v0490.py +6 -3
  21. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_v0500_part_e.py +7 -3
  22. soup_cli-0.53.4/tests/test_v0533.py +375 -0
  23. soup_cli-0.53.4/tests/test_v0534.py +698 -0
  24. soup_cli-0.53.2/soup_cli/utils/block_expansion.py +0 -83
  25. soup_cli-0.53.2/soup_cli/utils/longlora.py +0 -111
  26. soup_cli-0.53.2/soup_cli/utils/prm.py +0 -100
  27. {soup_cli-0.53.2 → soup_cli-0.53.4}/.dockerignore +0 -0
  28. {soup_cli-0.53.2 → soup_cli-0.53.4}/.github/FUNDING.yml +0 -0
  29. {soup_cli-0.53.2 → soup_cli-0.53.4}/.github/ISSUE_TEMPLATE/bug_report.yml +0 -0
  30. {soup_cli-0.53.2 → soup_cli-0.53.4}/.github/ISSUE_TEMPLATE/config.yml +0 -0
  31. {soup_cli-0.53.2 → soup_cli-0.53.4}/.github/ISSUE_TEMPLATE/feature_request.yml +0 -0
  32. {soup_cli-0.53.2 → soup_cli-0.53.4}/.github/pull_request_template.md +0 -0
  33. {soup_cli-0.53.2 → soup_cli-0.53.4}/.github/workflows/ci.yml +0 -0
  34. {soup_cli-0.53.2 → soup_cli-0.53.4}/.github/workflows/docker.yml +0 -0
  35. {soup_cli-0.53.2 → soup_cli-0.53.4}/.github/workflows/publish.yml +0 -0
  36. {soup_cli-0.53.2 → soup_cli-0.53.4}/.github/workflows/recipe-validation.yml +0 -0
  37. {soup_cli-0.53.2 → soup_cli-0.53.4}/.gitignore +0 -0
  38. {soup_cli-0.53.2 → soup_cli-0.53.4}/CODEOWNERS +0 -0
  39. {soup_cli-0.53.2 → soup_cli-0.53.4}/CODE_OF_CONDUCT.md +0 -0
  40. {soup_cli-0.53.2 → soup_cli-0.53.4}/Dockerfile +0 -0
  41. {soup_cli-0.53.2 → soup_cli-0.53.4}/LICENSE +0 -0
  42. {soup_cli-0.53.2 → soup_cli-0.53.4}/NOTICE +0 -0
  43. {soup_cli-0.53.2 → soup_cli-0.53.4}/docker-compose.yml +0 -0
  44. {soup_cli-0.53.2 → soup_cli-0.53.4}/examples/README.md +0 -0
  45. {soup_cli-0.53.2 → soup_cli-0.53.4}/examples/configs/dpo_chat.yaml +0 -0
  46. {soup_cli-0.53.2 → soup_cli-0.53.4}/examples/configs/dpo_example.yaml +0 -0
  47. {soup_cli-0.53.2 → soup_cli-0.53.4}/examples/configs/grpo_reasoning.yaml +0 -0
  48. {soup_cli-0.53.2 → soup_cli-0.53.4}/examples/configs/rlhf_step1_sft.yaml +0 -0
  49. {soup_cli-0.53.2 → soup_cli-0.53.4}/examples/configs/rlhf_step2_reward.yaml +0 -0
  50. {soup_cli-0.53.2 → soup_cli-0.53.4}/examples/configs/rlhf_step3_ppo.yaml +0 -0
  51. {soup_cli-0.53.2 → soup_cli-0.53.4}/examples/configs/sft_basic.yaml +0 -0
  52. {soup_cli-0.53.2 → soup_cli-0.53.4}/examples/configs/vision_llama.yaml +0 -0
  53. {soup_cli-0.53.2 → soup_cli-0.53.4}/examples/data/alpaca_tiny.jsonl +0 -0
  54. {soup_cli-0.53.2 → soup_cli-0.53.4}/examples/data/chat_preferences.jsonl +0 -0
  55. {soup_cli-0.53.2 → soup_cli-0.53.4}/examples/data/dpo_sample.jsonl +0 -0
  56. {soup_cli-0.53.2 → soup_cli-0.53.4}/examples/data/reasoning_math.jsonl +0 -0
  57. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup.png +0 -0
  58. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/__main__.py +0 -0
  59. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/autopilot/__init__.py +0 -0
  60. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/autopilot/analyzer.py +0 -0
  61. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/autopilot/decisions.py +0 -0
  62. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/autopilot/generate_config.py +0 -0
  63. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/cans/__init__.py +0 -0
  64. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/cans/pack.py +0 -0
  65. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/cans/publish.py +0 -0
  66. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/cans/run.py +0 -0
  67. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/cans/schema.py +0 -0
  68. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/cans/unpack.py +0 -0
  69. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/cans/verify.py +0 -0
  70. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/cli.py +0 -0
  71. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/commands/__init__.py +0 -0
  72. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/commands/adapters.py +0 -0
  73. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/commands/agent.py +0 -0
  74. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/commands/autopilot.py +0 -0
  75. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/commands/bench.py +0 -0
  76. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/commands/can.py +0 -0
  77. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/commands/chat.py +0 -0
  78. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/commands/cost.py +0 -0
  79. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/commands/data.py +0 -0
  80. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/commands/data_forge.py +0 -0
  81. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/commands/data_mix.py +0 -0
  82. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/commands/data_score.py +0 -0
  83. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/commands/delinearize_llama4.py +0 -0
  84. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/commands/deploy.py +0 -0
  85. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/commands/diff.py +0 -0
  86. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/commands/doctor.py +0 -0
  87. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/commands/eval.py +0 -0
  88. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/commands/export.py +0 -0
  89. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/commands/fetch.py +0 -0
  90. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/commands/generate.py +0 -0
  91. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/commands/history.py +0 -0
  92. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/commands/infer.py +0 -0
  93. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/commands/init.py +0 -0
  94. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/commands/llama.py +0 -0
  95. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/commands/merge.py +0 -0
  96. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/commands/merge_sharded_fsdp_weights.py +0 -0
  97. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/commands/migrate.py +0 -0
  98. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/commands/monitor.py +0 -0
  99. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/commands/plugins.py +0 -0
  100. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/commands/profile.py +0 -0
  101. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/commands/push.py +0 -0
  102. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/commands/quantize.py +0 -0
  103. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/commands/quickstart.py +0 -0
  104. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/commands/recipes.py +0 -0
  105. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/commands/registry.py +0 -0
  106. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/commands/runs.py +0 -0
  107. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/commands/serve.py +0 -0
  108. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/commands/sweep.py +0 -0
  109. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/commands/train.py +0 -0
  110. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/commands/tui.py +0 -0
  111. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/commands/ui.py +0 -0
  112. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/commands/why.py +0 -0
  113. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/config/__init__.py +0 -0
  114. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/config/loader.py +0 -0
  115. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/data/__init__.py +0 -0
  116. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/data/augment.py +0 -0
  117. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/data/chat_templates.py +0 -0
  118. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/data/collators.py +0 -0
  119. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/data/formats.py +0 -0
  120. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/data/loader.py +0 -0
  121. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/data/loss_mask.py +0 -0
  122. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/data/providers/__init__.py +0 -0
  123. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/data/providers/_utils.py +0 -0
  124. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/data/providers/anthropic.py +0 -0
  125. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/data/providers/ollama.py +0 -0
  126. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/data/providers/vllm.py +0 -0
  127. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/data/sft_format.py +0 -0
  128. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/data/templates/__init__.py +0 -0
  129. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/data/templates/code.py +0 -0
  130. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/data/templates/conversation.py +0 -0
  131. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/data/templates/preference.py +0 -0
  132. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/data/templates/qa.py +0 -0
  133. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/data/templates/reasoning.py +0 -0
  134. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/data/templates/tool_calling.py +0 -0
  135. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/data/templates/verifiable.py +0 -0
  136. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/data/traces/__init__.py +0 -0
  137. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/data/traces/pair_builder.py +0 -0
  138. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/data/traces/parsers.py +0 -0
  139. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/data/traces/quality.py +0 -0
  140. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/data/validator.py +0 -0
  141. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/eval/__init__.py +0 -0
  142. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/eval/arena.py +0 -0
  143. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/eval/benchmarks_v0_43.py +0 -0
  144. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/eval/calibrate.py +0 -0
  145. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/eval/checkpoint_intelligence.py +0 -0
  146. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/eval/custom.py +0 -0
  147. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/eval/forgetting.py +0 -0
  148. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/eval/gate.py +0 -0
  149. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/eval/human.py +0 -0
  150. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/eval/judge.py +0 -0
  151. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/eval/leaderboard.py +0 -0
  152. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/eval/quant_check.py +0 -0
  153. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/experiment/__init__.py +0 -0
  154. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/experiment/tracker.py +0 -0
  155. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/migrate/__init__.py +0 -0
  156. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/migrate/axolotl.py +0 -0
  157. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/migrate/common.py +0 -0
  158. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/migrate/llamafactory.py +0 -0
  159. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/migrate/unsloth.py +0 -0
  160. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/monitoring/__init__.py +0 -0
  161. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/monitoring/callback.py +0 -0
  162. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/monitoring/display.py +0 -0
  163. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/monitoring/hf_push.py +0 -0
  164. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/monitoring/trace_logger.py +0 -0
  165. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/plugins/__init__.py +0 -0
  166. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/recipes/__init__.py +0 -0
  167. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/recipes/catalog.py +0 -0
  168. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/registry/__init__.py +0 -0
  169. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/registry/attach.py +0 -0
  170. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/registry/diff.py +0 -0
  171. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/registry/hashing.py +0 -0
  172. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/registry/store.py +0 -0
  173. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/templates/__init__.py +0 -0
  174. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/templates/audio.yaml +0 -0
  175. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/templates/bco.yaml +0 -0
  176. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/templates/chat.yaml +0 -0
  177. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/templates/code.yaml +0 -0
  178. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/templates/embedding.yaml +0 -0
  179. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/templates/fetch_examples/llama-3.1-8b-lora.yaml +0 -0
  180. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/templates/fetch_examples/qwen2.5-7b-dpo.yaml +0 -0
  181. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/templates/fetch_examples/zero3-cpu-offload.json +0 -0
  182. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/templates/ipo.yaml +0 -0
  183. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/templates/kto.yaml +0 -0
  184. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/templates/longcontext.yaml +0 -0
  185. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/templates/manifest.json +0 -0
  186. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/templates/medical.yaml +0 -0
  187. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/templates/moe.yaml +0 -0
  188. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/templates/orpo.yaml +0 -0
  189. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/templates/pretrain.yaml +0 -0
  190. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/templates/reasoning.yaml +0 -0
  191. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/templates/rlhf.yaml +0 -0
  192. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/templates/simpo.yaml +0 -0
  193. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/templates/tool-calling.yaml +0 -0
  194. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/templates/vision.yaml +0 -0
  195. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/trainer/__init__.py +0 -0
  196. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/trainer/bco.py +0 -0
  197. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/trainer/classifier.py +0 -0
  198. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/trainer/distill.py +0 -0
  199. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/trainer/dpo.py +0 -0
  200. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/trainer/embedding.py +0 -0
  201. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/trainer/ipo.py +0 -0
  202. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/trainer/kto.py +0 -0
  203. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/trainer/mlx_dpo.py +0 -0
  204. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/trainer/mlx_grpo.py +0 -0
  205. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/trainer/mlx_routing.py +0 -0
  206. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/trainer/mlx_sft.py +0 -0
  207. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/trainer/orpo.py +0 -0
  208. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/trainer/ppo.py +0 -0
  209. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/trainer/preference.py +0 -0
  210. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/trainer/reward_model.py +0 -0
  211. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/trainer/rewards.py +0 -0
  212. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/trainer/simpo.py +0 -0
  213. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/tui_app.py +0 -0
  214. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/ui/__init__.py +0 -0
  215. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/ui/app.py +0 -0
  216. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/ui/plugins/__init__.py +0 -0
  217. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/ui/static/app.js +0 -0
  218. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/ui/static/index.html +0 -0
  219. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/ui/static/logo.png +0 -0
  220. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/ui/static/logo.svg +0 -0
  221. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/ui/static/style.css +0 -0
  222. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/__init__.py +0 -0
  223. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/activation_offload.py +0 -0
  224. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/advanced_precision.py +0 -0
  225. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/agent_forge.py +0 -0
  226. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/agent_rollout.py +0 -0
  227. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/anthropic_messages.py +0 -0
  228. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/auto_quant.py +0 -0
  229. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/batch_probe.py +0 -0
  230. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/bitnet.py +0 -0
  231. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/checkpoint_trigger.py +0 -0
  232. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/classifier.py +0 -0
  233. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/constants.py +0 -0
  234. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/convergence.py +0 -0
  235. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/crash.py +0 -0
  236. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/cross_doc_attn.py +0 -0
  237. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/curriculum.py +0 -0
  238. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/curriculum_dynamic.py +0 -0
  239. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/cut_ce.py +0 -0
  240. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/data_forge.py +0 -0
  241. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/data_mix.py +0 -0
  242. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/data_pipeline.py +0 -0
  243. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/data_score.py +0 -0
  244. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/deepspeed.py +0 -0
  245. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/delinearize_llama4.py +0 -0
  246. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/demo_bundles.py +0 -0
  247. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/deploy_autopilot.py +0 -0
  248. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/deploy_measure.py +0 -0
  249. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/distill.py +0 -0
  250. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/dpo_variants.py +0 -0
  251. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/ebft_gdpo.py +0 -0
  252. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/encoding.py +0 -0
  253. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/fetch_examples.py +0 -0
  254. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/fp8.py +0 -0
  255. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/freeze.py +0 -0
  256. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/fsdp.py +0 -0
  257. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/fsdp_consolidate.py +0 -0
  258. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/galore.py +0 -0
  259. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/gguf_quant.py +0 -0
  260. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/gpu.py +0 -0
  261. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/gpu_monitor.py +0 -0
  262. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/graceful_save.py +0 -0
  263. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/grad_accum.py +0 -0
  264. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/gradient_ckpt.py +0 -0
  265. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/grpo_long_context.py +0 -0
  266. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/grpo_variants.py +0 -0
  267. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/hf.py +0 -0
  268. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/hf_space.py +0 -0
  269. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/hubs.py +0 -0
  270. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/integrations.py +0 -0
  271. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/jinja_analyzer.py +0 -0
  272. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/kernel_picker.py +0 -0
  273. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/kv_cache.py +0 -0
  274. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/launcher.py +0 -0
  275. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/liger.py +0 -0
  276. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/llama_proxy.py +0 -0
  277. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/llama_server_timings.py +0 -0
  278. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/loftq_init.py +0 -0
  279. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/log_level.py +0 -0
  280. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/lr_finder.py +0 -0
  281. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/lr_groups.py +0 -0
  282. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/metrics.py +0 -0
  283. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/mii.py +0 -0
  284. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/mixed_precision.py +0 -0
  285. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/mlx.py +0 -0
  286. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/moe.py +0 -0
  287. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/moe_quant.py +0 -0
  288. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/multipack.py +0 -0
  289. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/multipack_sampler.py +0 -0
  290. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/multipack_trainer.py +0 -0
  291. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/neat_packing.py +0 -0
  292. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/ngram_spec.py +0 -0
  293. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/nlg_metrics.py +0 -0
  294. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/ollama.py +0 -0
  295. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/onboarding.py +0 -0
  296. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/optimizer_zoo.py +0 -0
  297. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/paths.py +0 -0
  298. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/peft_builder.py +0 -0
  299. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/peft_patches.py +0 -0
  300. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/peft_wiring.py +0 -0
  301. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/pipeline.py +0 -0
  302. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/preference_combine.py +0 -0
  303. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/profiler.py +0 -0
  304. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/profiling.py +0 -0
  305. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/profiling_v0_43.py +0 -0
  306. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/qat.py +0 -0
  307. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/qr_url.py +0 -0
  308. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/quality.py +0 -0
  309. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/quant_menu.py +0 -0
  310. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/reasoning_effort.py +0 -0
  311. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/reasoning_parser.py +0 -0
  312. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/recipe_dag.py +0 -0
  313. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/registry.py +0 -0
  314. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/relora.py +0 -0
  315. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/replay.py +0 -0
  316. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/ring_attention.py +0 -0
  317. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/run_cost.py +0 -0
  318. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/save_formats.py +0 -0
  319. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/server_tools.py +0 -0
  320. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/sglang.py +0 -0
  321. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/shortcuts.py +0 -0
  322. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/spec_pairing.py +0 -0
  323. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/spike_recovery.py +0 -0
  324. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/sse_train_stream.py +0 -0
  325. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/structured_output.py +0 -0
  326. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/sweep_config.py +0 -0
  327. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/tail_latency.py +0 -0
  328. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/tool_outputs.py +0 -0
  329. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/topology.py +0 -0
  330. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/tracing.py +0 -0
  331. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/trackers.py +0 -0
  332. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/trainer_plugins.py +0 -0
  333. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/trust_remote.py +0 -0
  334. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/tts.py +0 -0
  335. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/ui_env.py +0 -0
  336. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/unsloth.py +0 -0
  337. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/v028_features.py +0 -0
  338. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/vllm.py +0 -0
  339. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/vscode_setup.py +0 -0
  340. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/warmup.py +0 -0
  341. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_cli/utils/why.py +0 -0
  342. {soup_cli-0.53.2 → soup_cli-0.53.4}/soup_logo_svg.svg +0 -0
  343. {soup_cli-0.53.2 → soup_cli-0.53.4}/templates/chat.yaml +0 -0
  344. {soup_cli-0.53.2 → soup_cli-0.53.4}/templates/code.yaml +0 -0
  345. {soup_cli-0.53.2 → soup_cli-0.53.4}/templates/medical.yaml +0 -0
  346. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/__init__.py +0 -0
  347. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/conftest.py +0 -0
  348. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_adapters.py +0 -0
  349. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_advanced_peft.py +0 -0
  350. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_assistant_mask.py +0 -0
  351. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_audio.py +0 -0
  352. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_auto_tuning.py +0 -0
  353. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_autopilot.py +0 -0
  354. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_awq_gptq_export.py +0 -0
  355. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_batch_probe.py +0 -0
  356. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_bco.py +0 -0
  357. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_bench.py +0 -0
  358. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_bugfixes.py +0 -0
  359. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_callback.py +0 -0
  360. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_cans.py +0 -0
  361. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_chat.py +0 -0
  362. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_chat_template.py +0 -0
  363. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_cli.py +0 -0
  364. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_cli_subprocess.py +0 -0
  365. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_config.py +0 -0
  366. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_cost.py +0 -0
  367. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_crash_reporter.py +0 -0
  368. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_curriculum.py +0 -0
  369. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_data.py +0 -0
  370. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_data_augment.py +0 -0
  371. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_data_sample.py +0 -0
  372. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_data_split.py +0 -0
  373. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_data_tools.py +0 -0
  374. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_dataset_hub.py +0 -0
  375. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_dataset_registry.py +0 -0
  376. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_deepspeed.py +0 -0
  377. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_deploy_ollama.py +0 -0
  378. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_diff.py +0 -0
  379. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_display.py +0 -0
  380. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_doctor.py +0 -0
  381. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_dpo_example.py +0 -0
  382. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_dpo_variants.py +0 -0
  383. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_embedding.py +0 -0
  384. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_eval.py +0 -0
  385. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_eval_gate.py +0 -0
  386. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_eval_platform.py +0 -0
  387. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_export.py +0 -0
  388. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_formats.py +0 -0
  389. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_fp8_recipe.py +0 -0
  390. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_freeze_training.py +0 -0
  391. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_generate.py +0 -0
  392. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_gpu.py +0 -0
  393. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_grpo.py +0 -0
  394. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_hf_integration.py +0 -0
  395. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_infer.py +0 -0
  396. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_inference_advanced.py +0 -0
  397. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_init.py +0 -0
  398. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_ipo.py +0 -0
  399. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_jinja_analyzer.py +0 -0
  400. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_kto.py +0 -0
  401. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_loader.py +0 -0
  402. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_log_level.py +0 -0
  403. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_loss_watchdog.py +0 -0
  404. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_merge.py +0 -0
  405. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_migrate.py +0 -0
  406. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_mlx_backend.py +0 -0
  407. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_moe.py +0 -0
  408. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_multi_adapter.py +0 -0
  409. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_multi_gpu.py +0 -0
  410. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_multipack_config.py +0 -0
  411. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_multipack_invariants.py +0 -0
  412. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_multipack_sampler.py +0 -0
  413. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_neat_packing.py +0 -0
  414. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_neftune_rslora.py +0 -0
  415. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_onnx_tensorrt_export.py +0 -0
  416. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_orpo.py +0 -0
  417. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_packing.py +0 -0
  418. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_part_a_wave1.py +0 -0
  419. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_part_a_wave2.py +0 -0
  420. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_part_b.py +0 -0
  421. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_part_c.py +0 -0
  422. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_part_d.py +0 -0
  423. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_part_e.py +0 -0
  424. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_part_f_hardening.py +0 -0
  425. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_peft_methods.py +0 -0
  426. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_peft_patches.py +0 -0
  427. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_performance.py +0 -0
  428. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_pissa_init.py +0 -0
  429. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_ppo.py +0 -0
  430. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_preference_dispatcher.py +0 -0
  431. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_preference_multi.py +0 -0
  432. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_preference_multi_runtime.py +0 -0
  433. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_pretrain.py +0 -0
  434. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_profile.py +0 -0
  435. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_profiling.py +0 -0
  436. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_progress.py +0 -0
  437. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_push.py +0 -0
  438. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_qat.py +0 -0
  439. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_quality_filter.py +0 -0
  440. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_quant_check.py +0 -0
  441. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_quant_menu.py +0 -0
  442. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_quickstart.py +0 -0
  443. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_rank_pattern.py +0 -0
  444. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_recipes.py +0 -0
  445. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_recipes_v031.py +0 -0
  446. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_registry.py +0 -0
  447. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_relora.py +0 -0
  448. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_replay.py +0 -0
  449. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_resume.py +0 -0
  450. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_rlvr.py +0 -0
  451. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_run_cost.py +0 -0
  452. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_runs.py +0 -0
  453. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_serve.py +0 -0
  454. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_server_generate.py +0 -0
  455. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_sglang_serve.py +0 -0
  456. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_simpo.py +0 -0
  457. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_smoke_train.py +0 -0
  458. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_speculative_decoding.py +0 -0
  459. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_sweep.py +0 -0
  460. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_synth_data_pro.py +0 -0
  461. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_templates_yaml.py +0 -0
  462. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_tensorboard.py +0 -0
  463. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_tool_calling.py +0 -0
  464. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_trace_to_pref.py +0 -0
  465. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_tracker.py +0 -0
  466. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_trainer_coverage_v035.py +0 -0
  467. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_trainer_init.py +0 -0
  468. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_training_intelligence.py +0 -0
  469. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_training_speed.py +0 -0
  470. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_trust_remote_code.py +0 -0
  471. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_tui.py +0 -0
  472. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_ui.py +0 -0
  473. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_ui_chat.py +0 -0
  474. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_ui_config_builder.py +0 -0
  475. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_ui_live_monitor.py +0 -0
  476. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_ui_metrics.py +0 -0
  477. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_unsloth.py +0 -0
  478. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_v0401_part_c.py +0 -0
  479. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_v0401_part_d.py +0 -0
  480. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_v0401_part_e.py +0 -0
  481. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_v0402_part_a.py +0 -0
  482. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_v0402_part_b.py +0 -0
  483. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_v0403_part_a.py +0 -0
  484. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_v0403_part_b.py +0 -0
  485. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_v0403_part_c.py +0 -0
  486. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_v0404_part_a.py +0 -0
  487. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_v0404_part_b.py +0 -0
  488. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_v0405_part_a.py +0 -0
  489. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_v0406_part_a.py +0 -0
  490. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_v0410_part_a.py +0 -0
  491. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_v0410_part_b.py +0 -0
  492. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_v0420.py +0 -0
  493. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_v0430_part_a.py +0 -0
  494. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_v0430_part_b.py +0 -0
  495. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_v0430_part_c.py +0 -0
  496. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_v0430_part_d.py +0 -0
  497. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_v0440_part_a.py +0 -0
  498. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_v0440_part_b.py +0 -0
  499. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_v0440_part_c.py +0 -0
  500. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_v0440_part_d.py +0 -0
  501. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_v0440_review_followups.py +0 -0
  502. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_v0450.py +0 -0
  503. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_v0460_part_a.py +0 -0
  504. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_v0460_part_b.py +0 -0
  505. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_v0470_part_a.py +0 -0
  506. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_v0470_part_b.py +0 -0
  507. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_v0480_part_a.py +0 -0
  508. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_v0480_part_b.py +0 -0
  509. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_v0500_part_a.py +0 -0
  510. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_v0500_part_b.py +0 -0
  511. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_v0500_part_c.py +0 -0
  512. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_v0500_part_d.py +0 -0
  513. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_v0510.py +0 -0
  514. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_v0520.py +0 -0
  515. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_v0530.py +0 -0
  516. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_v0531_109.py +0 -0
  517. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_v0531_139.py +0 -0
  518. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_v0531_142.py +0 -0
  519. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_v0531_82.py +0 -0
  520. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_v0532.py +0 -0
  521. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_validator.py +0 -0
  522. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_vision.py +0 -0
  523. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_vllm_serve.py +0 -0
  524. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_why.py +0 -0
  525. {soup_cli-0.53.2 → soup_cli-0.53.4}/tests/test_windows_encoding.py +0 -0
@@ -111,7 +111,7 @@ soup_cli/
111
111
  templates/ - 17 built-in soup.yaml templates (YAML + manifest.json) with load_template loader (v0.39.0, +bco v0.40.0)
112
112
  ui/ - Web UI (FastAPI + HTML/JS SPA)
113
113
 
114
- tests/ - Test suite (185 files, 7842 tests)
114
+ tests/ - Test suite (187 files, 7935 tests)
115
115
  examples/ - Real-world config examples and datasets
116
116
  ```
117
117
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: soup-cli
3
- Version: 0.53.2
3
+ Version: 0.53.4
4
4
  Summary: Fine-tune LLMs in one command. No SSH, no config hell.
5
5
  Project-URL: Homepage, https://github.com/MakazhanAlpamys/Soup
6
6
  Project-URL: Repository, https://github.com/MakazhanAlpamys/Soup
@@ -134,14 +134,14 @@ soup train
134
134
 
135
135
  Latest highlights only. Full history: [GitHub Releases](https://github.com/MakazhanAlpamys/Soup/releases).
136
136
 
137
- **v0.53.2Modality II live trainers**: Four v0.52.0 deferred stubs lifted into real, end-to-end-trainable wrappers knowledge distillation, sequence classification, EBFT / GDPO loss kernels, and gpt-oss-style `reasoning_effort` system-prompt injection.
137
+ **v0.53.4Long Context + Architecture**: Six closes LongLoRA hardened, LLaMA Pro block expansion lifted from deferred stub to live wiring, and CUDA-OOM hint upgraded.
138
138
 
139
- - **`soup train` with `task: distill`.** New `DistillTrainerWrapper`: student + frozen teacher both load via `AutoModelForCausalLM` (separate `trust_remote_code` resolution for each), KL / forward_KL / reverse_KL / JS divergence kernels scaled by `temperature**2` per the Hinton paper. Device-bridge: teacher inputs auto-move to the teacher's device, teacher logits move back onto the student's device before the KL kernel survives HF Trainer's auto-CUDA promotion on a CPU-tagged run. `DataCollatorForSeq2Seq(label_pad_token_id=-100)` handles variable-length pre-tokenised loss-masked rows correctly.
140
- - **`soup train` with `task: classifier | reranker | cross_encoder`.** New `ClassifierTrainerWrapper`: `AutoModelForSequenceClassification` with `num_labels` and `label_names`, auto-routes `single_label_classification` / `multi_label_classification` from `tcfg.classifier_kind`. Multi-label string labels resolved via the `label_names` map with a 1024-entry cap + dedup. Training Setup Panel renders `Head: num_labels=N, kind=...` instead of LoRA r/alpha for the classifier family.
141
- - **EBFT structured / strided + GDPO standard / length_normalized / margin loss kernels.** `apply_ebft_loss` and `apply_gdpo_loss` exit the v0.52.0 `NotImplementedError` stubs with finite-only-input guards and bool-rejected numeric params. `attach_ebft_compute_loss(trainer, tcfg)` (SFT) and `attach_gdpo_compute_loss(trainer, tcfg)` (DPO) wrap `Trainer.compute_loss` idempotently re-attach is a no-op via a marker attribute on the wrapped method. Auto-attached when the corresponding `*_variant` field is set on `TrainingConfig`.
142
- - **gpt-oss `reasoning_effort` + `train_on_eot`.** `apply_reasoning_effort_prefix(messages, level)` injects `<|reasoning_effort|>{low,medium,high}<|/reasoning_effort|>` into the system turn (creates one if absent), returning a new list (caller's messages immutable). `build_assistant_only_labels(train_on_eot=True)` keeps the EOT/EOS token unmasked at the assistant-turn boundary so the model learns when to stop. Both gated to the SFT-family at config-load.
143
- - **+120 net new tests** (7722 → 7842) across `test_v0532.py`. Four review agents (python / code / security / tdd) ran; every CRITICAL / HIGH / MEDIUM / LOW finding fixed — separate `trust_remote_code` resolution for student vs teacher, idempotent attach hooks with regression tests, 1024-entry multi-label cap, `dpo_margin` defaults to `None` (not `0.0`) so missing values raise rather than silently zero, source-grep regression guards on the trainer-routing call sites use the full instantiation expression (no comment-only false-positives), Panel renders the classifier head instead of LoRA r/alpha.
144
- - **Local end-to-end CPU smoke** confirms both new wrappers train 2 steps with finite loss on `hf-internal-testing/tiny-random-gpt2`. Two real bugs surfaced and were fixed during the smoke (collator label padding + teacher / student device mismatch) both have source-level regression guards in the test suite. ONNX export QA: pipeline integrity proven on tiny-gpt2; TinyLlama-1.1B full export is host-RAM-bound (documented in `tests/qa/v053_qa.md`).
139
+ - **LLaMA Pro is live.** `soup train` and `soup data` pretraining now honour `training.expand_layers: N` by deep-copying the last N decoder blocks, zero-initialising their residual projections (so the appended block initially acts as identity per the LLaMA Pro paper §3.1), and appending to `model.layers`. Pair with `freeze_trainable_layers: N` to train only the new blocks. Centralised via `block_expansion.apply_block_expansion_if_configured` so SFT and Pretrain stay in lock-step.
140
+ - **LongLoRA arch allowlist expanded** to Llama / CodeLlama / Mistral / Qwen / Phi via word-boundary helpers (`is_mistral_model`, `is_qwen_model`, `is_phi_model`). Mixtral is intentionally excluded its MoE attention requires a dedicated helper still tracked for a future release.
141
+ - **LongLoRA + FlashAttention v3 reject.** New `flash_attn.is_flash_attn_v3_available()` probe when FA-v3 is installed the schema now rejects `use_longlora: true` at load time with an actionable error (S² shifted-sparse + FA-v3 custom-mask both rewrite the kernel; allowing both would silently corrupt outputs).
142
+ - **Llama 3.1 RoPE auto-detect.** Pass `rope_scaling_type: None` (or omit it) on a Llama 3.1 base, and `apply_long_context_config` now reads `model.config.rope_scaling`. If it carries a `llama3` block, the long-context path picks `LLAMA3_DEFAULT_*` instead of falling through to `dynamic`. Explicit caller picks still win.
143
+ - **CUDA-OOM hint upgrade.** `format_friendly_error` now points users at the exact CLI flags `--batch-size <half>` and `--grad-accum <double>` to preserve effective batch size before the legacy `quantization: 4bit` fallback.
144
+ - **+56 net new tests** (7879 7935) across the new `test_v0534.py`. Four review agents (python / code / security / tdd) ran; every CRITICAL LOW finding was fixed shared centralised helper for trainer drift, `is None` over falsy guards, defensive non-string surface on `is_supported_longlora_arch`, 64-char base-name truncation in error messages, null-byte rejection on `task`/`backend`, and a `warnings.warn` when block expansion runs on non-Llama-shaped architectures.
145
145
 
146
146
  ## Why Soup?
147
147
 
@@ -917,9 +917,9 @@ training:
917
917
 
918
918
  **YaRN.** Best quality for 4-8x extension. Tunables (`yarn_factor`, `yarn_attn_factor`, `yarn_beta_fast`, `yarn_beta_slow`) only apply when `rope_scaling_type=yarn`; the schema rejects them otherwise. Pure-Python math kernels are exposed at `soup_cli.utils.long_context.yarn_*` for reference / config-emit. The actual RoPE rotation runs inside HF Transformers.
919
919
 
920
- **Llama 3.1 NTK-aware.** Use `rope_scaling_type: llama3` for the canonical Llama 3.1 frequency-band scaling (`scale_factor=8`, `low_freq_factor=1`, `high_freq_factor=4`, `old_context_len=8192`). `detect_llama3_rope_in_config` auto-detects the block in any HF model config dict.
920
+ **Llama 3.1 NTK-aware.** Use `rope_scaling_type: llama3` for the canonical Llama 3.1 frequency-band scaling (`scale_factor=8`, `low_freq_factor=1`, `high_freq_factor=4`, `old_context_len=8192`). `detect_llama3_rope_in_config` auto-detects the block in any HF model config dict. Omit `rope_scaling_type` from your YAML (so it stays `None`) on a Llama 3.1 base and `apply_long_context_config` will auto-pick `llama3` by reading `model.config.rope_scaling` at load time — explicit caller picks still win.
921
921
 
922
- **LongLoRA S² (schema-only this release).** `training.use_longlora: true` requires `task=sft`, `backend=transformers`, Llama-family base, and `use_ring_attention=false`. The schema gate fails fast at config load; live forward override mirroring LlamaFactory `model/model_utils/longlora.py` lands in v0.49.1.
922
+ **LongLoRA S² (schema-only this release).** `training.use_longlora: true` requires `task=sft`, `backend=transformers`, a base in the architecture allowlist (Llama / CodeLlama / Mistral / Qwen / Phi — Mixtral excluded), and `use_ring_attention=false`. The schema also rejects the combo with FlashAttention v3 installed (the S² custom-mask kernel conflicts with FA-v3 native custom-mask). The schema gate fails fast at config load; live forward override mirroring LlamaFactory `model/model_utils/longlora.py` lands in a follow-up release.
923
923
 
924
924
  ```yaml
925
925
  # Llama 3.1 with NTK-aware scaling out to 128k
@@ -931,6 +931,27 @@ data:
931
931
  max_length: 131072
932
932
  ```
933
933
 
934
+ ## LLaMA Pro Block Expansion
935
+
936
+ Add `N` zero-initialised transformer blocks to a base model and train **only the new blocks** — keeps the original behaviour intact while adding capacity for a new domain (per the LLaMA Pro paper, `arxiv.org/abs/2401.02415`).
937
+
938
+ ```yaml
939
+ # soup.yaml — LLaMA Pro continued-training on a Llama-3.1 base
940
+ base: meta-llama/Llama-3.1-8B
941
+ task: sft
942
+ data:
943
+ train: ./domain.jsonl
944
+ training:
945
+ expand_layers: 4 # append 4 zero-init decoder blocks
946
+ freeze_trainable_layers: 4 # train only the appended blocks
947
+ lr: 5e-5
948
+ epochs: 1
949
+ ```
950
+
951
+ **What happens at trainer start.** Soup deep-copies the last `expand_layers` decoder blocks, zero-inits each clone's residual projections (`mlp.down_proj` + `self_attn.o_proj`) so the appended block initially acts as identity, appends them to `model.model.layers`, and updates `config.num_hidden_layers`. When `freeze_trainable_layers > 0` is set, every parameter except the appended blocks is frozen — this is the canonical LLaMA Pro "train only new blocks" recipe.
952
+
953
+ **Scope.** Works on both `task: sft` and `task: pretrain` with `backend: transformers`. Bounds: `expand_layers ∈ [1, 64]`. Over-expansion (more new blocks than the base has layers) silently clamps to the base layer count. Non-Llama-shaped architectures (e.g. Falcon's `dense_4h_to_h`) emit a `warnings.warn` because the residual zero-init heuristic only matches the standard `down_proj` / `o_proj` names — the appended blocks are still appended + trainable, but lose the identity-init guarantee.
954
+
934
955
  ## Optimizer & PEFT Zoo
935
956
 
936
957
  Pick from a wider catalogue of optimizers, target individual modules with their own LR, and use quantization-aware LoRA initialisation:
@@ -43,14 +43,14 @@ soup train
43
43
 
44
44
  Latest highlights only. Full history: [GitHub Releases](https://github.com/MakazhanAlpamys/Soup/releases).
45
45
 
46
- **v0.53.2Modality II live trainers**: Four v0.52.0 deferred stubs lifted into real, end-to-end-trainable wrappers knowledge distillation, sequence classification, EBFT / GDPO loss kernels, and gpt-oss-style `reasoning_effort` system-prompt injection.
46
+ **v0.53.4Long Context + Architecture**: Six closes LongLoRA hardened, LLaMA Pro block expansion lifted from deferred stub to live wiring, and CUDA-OOM hint upgraded.
47
47
 
48
- - **`soup train` with `task: distill`.** New `DistillTrainerWrapper`: student + frozen teacher both load via `AutoModelForCausalLM` (separate `trust_remote_code` resolution for each), KL / forward_KL / reverse_KL / JS divergence kernels scaled by `temperature**2` per the Hinton paper. Device-bridge: teacher inputs auto-move to the teacher's device, teacher logits move back onto the student's device before the KL kernel survives HF Trainer's auto-CUDA promotion on a CPU-tagged run. `DataCollatorForSeq2Seq(label_pad_token_id=-100)` handles variable-length pre-tokenised loss-masked rows correctly.
49
- - **`soup train` with `task: classifier | reranker | cross_encoder`.** New `ClassifierTrainerWrapper`: `AutoModelForSequenceClassification` with `num_labels` and `label_names`, auto-routes `single_label_classification` / `multi_label_classification` from `tcfg.classifier_kind`. Multi-label string labels resolved via the `label_names` map with a 1024-entry cap + dedup. Training Setup Panel renders `Head: num_labels=N, kind=...` instead of LoRA r/alpha for the classifier family.
50
- - **EBFT structured / strided + GDPO standard / length_normalized / margin loss kernels.** `apply_ebft_loss` and `apply_gdpo_loss` exit the v0.52.0 `NotImplementedError` stubs with finite-only-input guards and bool-rejected numeric params. `attach_ebft_compute_loss(trainer, tcfg)` (SFT) and `attach_gdpo_compute_loss(trainer, tcfg)` (DPO) wrap `Trainer.compute_loss` idempotently re-attach is a no-op via a marker attribute on the wrapped method. Auto-attached when the corresponding `*_variant` field is set on `TrainingConfig`.
51
- - **gpt-oss `reasoning_effort` + `train_on_eot`.** `apply_reasoning_effort_prefix(messages, level)` injects `<|reasoning_effort|>{low,medium,high}<|/reasoning_effort|>` into the system turn (creates one if absent), returning a new list (caller's messages immutable). `build_assistant_only_labels(train_on_eot=True)` keeps the EOT/EOS token unmasked at the assistant-turn boundary so the model learns when to stop. Both gated to the SFT-family at config-load.
52
- - **+120 net new tests** (7722 → 7842) across `test_v0532.py`. Four review agents (python / code / security / tdd) ran; every CRITICAL / HIGH / MEDIUM / LOW finding fixed — separate `trust_remote_code` resolution for student vs teacher, idempotent attach hooks with regression tests, 1024-entry multi-label cap, `dpo_margin` defaults to `None` (not `0.0`) so missing values raise rather than silently zero, source-grep regression guards on the trainer-routing call sites use the full instantiation expression (no comment-only false-positives), Panel renders the classifier head instead of LoRA r/alpha.
53
- - **Local end-to-end CPU smoke** confirms both new wrappers train 2 steps with finite loss on `hf-internal-testing/tiny-random-gpt2`. Two real bugs surfaced and were fixed during the smoke (collator label padding + teacher / student device mismatch) both have source-level regression guards in the test suite. ONNX export QA: pipeline integrity proven on tiny-gpt2; TinyLlama-1.1B full export is host-RAM-bound (documented in `tests/qa/v053_qa.md`).
48
+ - **LLaMA Pro is live.** `soup train` and `soup data` pretraining now honour `training.expand_layers: N` by deep-copying the last N decoder blocks, zero-initialising their residual projections (so the appended block initially acts as identity per the LLaMA Pro paper §3.1), and appending to `model.layers`. Pair with `freeze_trainable_layers: N` to train only the new blocks. Centralised via `block_expansion.apply_block_expansion_if_configured` so SFT and Pretrain stay in lock-step.
49
+ - **LongLoRA arch allowlist expanded** to Llama / CodeLlama / Mistral / Qwen / Phi via word-boundary helpers (`is_mistral_model`, `is_qwen_model`, `is_phi_model`). Mixtral is intentionally excluded its MoE attention requires a dedicated helper still tracked for a future release.
50
+ - **LongLoRA + FlashAttention v3 reject.** New `flash_attn.is_flash_attn_v3_available()` probe when FA-v3 is installed the schema now rejects `use_longlora: true` at load time with an actionable error (S² shifted-sparse + FA-v3 custom-mask both rewrite the kernel; allowing both would silently corrupt outputs).
51
+ - **Llama 3.1 RoPE auto-detect.** Pass `rope_scaling_type: None` (or omit it) on a Llama 3.1 base, and `apply_long_context_config` now reads `model.config.rope_scaling`. If it carries a `llama3` block, the long-context path picks `LLAMA3_DEFAULT_*` instead of falling through to `dynamic`. Explicit caller picks still win.
52
+ - **CUDA-OOM hint upgrade.** `format_friendly_error` now points users at the exact CLI flags `--batch-size <half>` and `--grad-accum <double>` to preserve effective batch size before the legacy `quantization: 4bit` fallback.
53
+ - **+56 net new tests** (7879 7935) across the new `test_v0534.py`. Four review agents (python / code / security / tdd) ran; every CRITICAL LOW finding was fixed shared centralised helper for trainer drift, `is None` over falsy guards, defensive non-string surface on `is_supported_longlora_arch`, 64-char base-name truncation in error messages, null-byte rejection on `task`/`backend`, and a `warnings.warn` when block expansion runs on non-Llama-shaped architectures.
54
54
 
55
55
  ## Why Soup?
56
56
 
@@ -826,9 +826,9 @@ training:
826
826
 
827
827
  **YaRN.** Best quality for 4-8x extension. Tunables (`yarn_factor`, `yarn_attn_factor`, `yarn_beta_fast`, `yarn_beta_slow`) only apply when `rope_scaling_type=yarn`; the schema rejects them otherwise. Pure-Python math kernels are exposed at `soup_cli.utils.long_context.yarn_*` for reference / config-emit. The actual RoPE rotation runs inside HF Transformers.
828
828
 
829
- **Llama 3.1 NTK-aware.** Use `rope_scaling_type: llama3` for the canonical Llama 3.1 frequency-band scaling (`scale_factor=8`, `low_freq_factor=1`, `high_freq_factor=4`, `old_context_len=8192`). `detect_llama3_rope_in_config` auto-detects the block in any HF model config dict.
829
+ **Llama 3.1 NTK-aware.** Use `rope_scaling_type: llama3` for the canonical Llama 3.1 frequency-band scaling (`scale_factor=8`, `low_freq_factor=1`, `high_freq_factor=4`, `old_context_len=8192`). `detect_llama3_rope_in_config` auto-detects the block in any HF model config dict. Omit `rope_scaling_type` from your YAML (so it stays `None`) on a Llama 3.1 base and `apply_long_context_config` will auto-pick `llama3` by reading `model.config.rope_scaling` at load time — explicit caller picks still win.
830
830
 
831
- **LongLoRA S² (schema-only this release).** `training.use_longlora: true` requires `task=sft`, `backend=transformers`, Llama-family base, and `use_ring_attention=false`. The schema gate fails fast at config load; live forward override mirroring LlamaFactory `model/model_utils/longlora.py` lands in v0.49.1.
831
+ **LongLoRA S² (schema-only this release).** `training.use_longlora: true` requires `task=sft`, `backend=transformers`, a base in the architecture allowlist (Llama / CodeLlama / Mistral / Qwen / Phi — Mixtral excluded), and `use_ring_attention=false`. The schema also rejects the combo with FlashAttention v3 installed (the S² custom-mask kernel conflicts with FA-v3 native custom-mask). The schema gate fails fast at config load; live forward override mirroring LlamaFactory `model/model_utils/longlora.py` lands in a follow-up release.
832
832
 
833
833
  ```yaml
834
834
  # Llama 3.1 with NTK-aware scaling out to 128k
@@ -840,6 +840,27 @@ data:
840
840
  max_length: 131072
841
841
  ```
842
842
 
843
+ ## LLaMA Pro Block Expansion
844
+
845
+ Add `N` zero-initialised transformer blocks to a base model and train **only the new blocks** — keeps the original behaviour intact while adding capacity for a new domain (per the LLaMA Pro paper, `arxiv.org/abs/2401.02415`).
846
+
847
+ ```yaml
848
+ # soup.yaml — LLaMA Pro continued-training on a Llama-3.1 base
849
+ base: meta-llama/Llama-3.1-8B
850
+ task: sft
851
+ data:
852
+ train: ./domain.jsonl
853
+ training:
854
+ expand_layers: 4 # append 4 zero-init decoder blocks
855
+ freeze_trainable_layers: 4 # train only the appended blocks
856
+ lr: 5e-5
857
+ epochs: 1
858
+ ```
859
+
860
+ **What happens at trainer start.** Soup deep-copies the last `expand_layers` decoder blocks, zero-inits each clone's residual projections (`mlp.down_proj` + `self_attn.o_proj`) so the appended block initially acts as identity, appends them to `model.model.layers`, and updates `config.num_hidden_layers`. When `freeze_trainable_layers > 0` is set, every parameter except the appended blocks is frozen — this is the canonical LLaMA Pro "train only new blocks" recipe.
861
+
862
+ **Scope.** Works on both `task: sft` and `task: pretrain` with `backend: transformers`. Bounds: `expand_layers ∈ [1, 64]`. Over-expansion (more new blocks than the base has layers) silently clamps to the base layer count. Non-Llama-shaped architectures (e.g. Falcon's `dense_4h_to_h`) emit a `warnings.warn` because the residual zero-init heuristic only matches the standard `down_proj` / `o_proj` names — the appended blocks are still appended + trainable, but lose the identity-init guarantee.
863
+
843
864
  ## Optimizer & PEFT Zoo
844
865
 
845
866
  Pick from a wider catalogue of optimizers, target individual modules with their own LR, and use quantization-aware LoRA initialisation:
@@ -9,7 +9,9 @@ We provide security updates for the following versions:
9
9
  - **Versions older than 3 minor versions:** No support
10
10
 
11
11
  Example:
12
- - v0.53.2 -- Full support (latest)
12
+ - v0.53.4 -- Full support (latest)
13
+ - v0.53.3 -- Full support
14
+ - v0.53.2 -- Full support
13
15
  - v0.53.1 -- Full support
14
16
  - v0.53.0 -- Full support
15
17
  - v0.52.0 -- Full support
@@ -148,6 +150,9 @@ No known critical vulnerabilities in current releases.
148
150
  - **v0.32.0 — Training Stability & Auto-Tuning**: `--find-lr-output` containment via shared `utils/paths.is_under_cwd` (prevents writes outside cwd); `save_lr_finder_report` rejects NaN / Infinity floats in `lrs` / `losses` and serialises with `allow_nan=False` (keeps the report parser-safe); `compute_lr_schedule` rejects non-positive `start_lr`, inverted ranges, and `num_steps` outside `[2, 10_000]`; `pick_mixed_precision` rejects empty / null-byte / >200-char model names and resolves multi-version quirks (`qwen2.5` vs `qwen2`, `phi-3.5` vs `phi-3`) by longest-substring-first iteration so an added family can never accidentally make a more-specific entry dead code; `compute_warmup_steps` clamps to `[10, 1000]` with a `ratio==0.0` short-circuit matching HF Trainer's "no warmup" convention; `SpikeRecoveryStrategy` is `@dataclass(frozen=True)` (post-construction mutation cannot bypass validation), `max_attempts ∈ [1, 10]`, `lr_decay ∈ (0, 1)`, `min_lr > 0`; cross-validator `_validate_spike_recovery_requires_watchdog` rejects `loss_spike_recovery=true, loss_watchdog=false` at config-load (fails fast instead of never triggering); `convergence_window ∈ [5, 10_000]`, `convergence_rel_tol ∈ (0, 1]`, `recommend_action` reuses `detect_plateau` so plateau heuristic stays single-source-of-truth; `GradAccumMonitor.recommend()` caps doubled `accum` at `MAX_ACCUM=1024` so a runaway advisory loop cannot blow up DataLoader prefetch; `generate_config` validates BOTH the YAML output path AND the embedded `decisions["output"]` field via `is_under_cwd` (closes the gap where a crafted `decisions["output"]="../../etc"` would have silently propagated into the rendered YAML)
149
151
  - **v0.34.0 — Observability & Dev UX**: `.crash` bundle generator (`utils/crash.py`) recursively redacts `hf_*` / `sk-*` / `Bearer …` token-shaped strings in any captured `config` and metric tail before serialisation, so a `.crash` file shared on a public GitHub issue cannot leak credentials; `output_dir` is reduced to `os.path.basename` so `$HOME` doesn't leak; `write_crash_bundle` uses `os.path.realpath + commonpath` for cwd containment (Windows-safe; raises `ValueError` not `PermissionError` so callers cannot silently swallow with `except OSError`); filename appends `secrets.token_hex(4)` so two crashes in the same UTC second don't collide; bundle truncated to `MAX_BUNDLE_BYTES=1_000_000`. `train.py` crash-write surfaces failures to the user (no silent missing-bundle). `profiling.py` `resolve_trace_path` rejects empty / `.` / `..` / `/` / `\\` / null-byte `run_id` (closes the `output_dir/profiles/../trace.json` escape) and uses `os.path.realpath + is_under_cwd`; profiles dir is created only on successful torch import (no stale empty dirs on torch-less CI). `tracker.get_run` LIKE-prefix match escapes `%` / `_` / `\\` and uses `ESCAPE '\\'` so a crafted `run_id` cannot widen the match (mirrors v0.26.0 registry policy). Lazy schema migration (`_ensure_schema`) tolerates the "duplicate column" race when two CLI processes start simultaneously on a fresh DB (fork-based multi-GPU training, TUI auto-refresh). `runs.py show/replay/clean` switched user `run_id` rendering to `markup_escape` and switched `clean` containment from broken `Path.resolve() + relative_to()` to project-standard `os.path.realpath + is_under_cwd`. `tui_app.py` lazy-imports `ExperimentTracker` and `markup_escape`s every DB-sourced string before passing into Textual widgets so a crafted base_model / experiment_name cannot inject `[bold red]…[/]` markup. `run_cost.estimate_run_cost_usd` rejects `bool` in `num_gpus` (bool is a subclass of int — same defence as v0.30.0 `Candidate.__post_init__`); duration clamped to `[0, 1 year]`; unknown GPU returns `None` so callers render `—` instead of fabricating `$0.00`. `log_level.parse_log_level` rejects non-string + null-byte input.
150
152
  - **v0.33.0 — Live Wire**: RLVR `code_exec_reward` adds OS-level isolation (Linux best-effort `os.unshare(CLONE_NEWUSER|CLONE_NEWNET|CLONE_NEWPID)`, macOS `sandbox-exec` with default-deny `MACOS_SANDBOX_PROFILE` narrowed to a 3-name `mach-lookup` allowlist to prevent DNS / NSURLSession bypass of `(deny network*)`); `prune_checkpoints` switches to TOCTOU-safe `os.lstat + S_ISLNK` + `shutil.rmtree(onerror=_abort_on_symlink)` so a symlink encountered mid-walk aborts rather than escapes; `run_gate` wraps each task scorer in a typed `try/except` so backend failures produce `score=None, error=str(exc)` (never silent `score=1.0`); `_parse_judge_url` removes the bare `http://` catch-all (defence-in-depth after the Pydantic GateTask validator); `soup can run` requires `--yes` or explicit consent callback and raises `ValueError` (not `PermissionError`, which is an `OSError` subclass that broad `except` blocks would swallow); GGUF `rglob` result for ollama deploy is `realpath+commonpath` checked against extract_dir (prevents symlink escape from a crafted can); `DeployTarget.path` validator normalises mixed `\\`/`/` separators before splitting (closes a Windows `..` bypass); `CAN_FORMAT_VERSION` 1→2 (additive — v1 still loads); `soup can publish` validates `repo_id` via `utils/hf.validate_repo_id`, resolves token via `resolve_token`, sanitises commit messages (first-line, 200-char cap), uses HTTPS-only HfApi; `_write_spike_recovery_hint` adds `is_under_cwd` containment check on `args.output_dir` from raw HF `TrainingArguments`; `lookup_entry_by_output_dir` emits `ResourceWarning` when 1000-row scan limit is hit (no silent miss); `CrossDocCollator` no longer mutates input feature dicts (HF Dataset rows are cached and reused — mutation broke subsequent batches); `Candidate` rejects `bool` in `score`/`latency_ms` (was sneaking past `int` isinstance check); `evaluate_candidate` latency mean now divides by *completed* prompts (excludes crashed) so a broken candidate isn't artificially fast; `auto_quant.run_auto_quant_picker` soft-falls-back to highest-scored candidate when no candidate clears `min_score` (server still binds); `build_logits_processors` returns `[]` when neither `outlines` nor `lm-format-enforcer` is installed (server degrades to free-form rather than 500); MII server uses loopback-only CORS, max_tokens cap [1, 16384], stream rejection, generic 500 with no stack-trace leak; `os.execvp` auto-reexec uses list args (no shell), all forwarded flags pre-validated; `cleanup_extract_dir` uses `os.path.commonpath` (Windows-safe) instead of `startswith`; `_run_subprocess` catches `TimeoutExpired` and returns rc=124 (coreutils convention) instead of an unhandled traceback; new `eval_results` and `tensorrt` artifact kinds in `RegistryStore._VALID_KINDS`
153
+ - **v0.53.4 — Long Context + Architecture**: six closes covering LongLoRA hardening, LLaMA Pro live wiring, and a CUDA-OOM-hint UX upgrade. (#11 OOM hint) `format_friendly_error` upgrades the CUDA-OOM and `OutOfMemoryError` patterns to point users at the explicit `--batch-size <half>` / `--grad-accum <double>` CLI flags before the legacy `quantization: 4bit` fallback — closes #11 with no functional change to the security surface. (#122 FlashAttention v3 incompatibility) New `soup_cli/utils/flash_attn.is_flash_attn_v3_available() -> bool` is a defensive probe (never raises, False on missing `flash_attn` / non-string `__version__` / unparseable / major < 3). `validate_longlora_compat` calls it AFTER the existing task / backend / architecture / ring-attention checks so the FA-v3 error only surfaces on otherwise-valid LongLoRA configs (avoids spurious confusion on unrelated misconfig). The check is loaded via a function-scoped import to keep `validate_longlora_compat` import-cheap and avoid CUDA-side effects at config load time on machines without `flash_attn` installed. (#120 LongLoRA arch allowlist) `soup_cli/utils/longlora.py` ships three new word-boundary regex helpers (`is_mistral_model`, `is_qwen_model`, `is_phi_model`) — same regex policy as v0.39.0 `is_gemma4_model` (rejects substring matches like `"my-mistralish-finetune"` or `"unmistral-7b"`). Shared `_check_model_name` input guard rejects `bool` BEFORE the `isinstance(str)` check (because bool is a subclass of int and would otherwise fall through silently — matches v0.53.3 `is_known_vlm_base` policy), rejects null bytes via explicit substring check, and returns `None` (→ helper returns False) for inputs >512 chars (avoids ReDoS-style overhead on adversarial input). New `is_supported_longlora_arch(model_name: object) -> bool` is the union accessor with defensive non-string surface (returns False rather than propagating TypeError, matches v0.53.3 / v0.52.0 model-detection policy). `validate_longlora_compat` also gained per-call null-byte rejection + bool/non-string TypeError on `task` and `backend` (matches v0.50.0 `validate_long_context_grpo_compat`); new `_truncate_for_message(value, limit=64)` helper bounds the `base` echo in error messages (security-review MEDIUM fix mirroring v0.53.3 `validate_vision_grpo_compat` redaction — defends against adversarial / long bases bloating stderr + log files). Mixtral is INTENTIONALLY excluded from the allowlist — regex matches `mistral` as a word-boundary token, NOT `mixtral`; documented at the docstring so a future contributor adding Mixtral support adds it explicitly. (#121 Llama 3.1 RoPE auto-detect) `apply_long_context_config` extended with `rope_scaling_type=None` auto-detect path — reads `model_config.rope_scaling` and runs `detect_llama3_rope_in_config` (v0.49.0 Part D helper) on it. If the existing block declares `llama3` (either via the legacy `type` key OR the newer `rope_type` alias), the auto-detect picks `"llama3"` + the upstream `LLAMA3_DEFAULT_*` constants; otherwise falls back to `"dynamic"`. Explicit caller pick still wins (any non-None value). Back-compat preserved by keeping the legacy default kwarg `rope_scaling_type="dynamic"`. The detect helper rejects non-Mapping config input via `TypeError` (no SSRF / file-read risk — the function is pure-Python data inspection). (#83 LLaMA Pro live block expansion) `soup_cli/utils/block_expansion.expand_model_blocks` lifts the v0.41.0 Part C `NotImplementedError` stub with a real implementation: clones the last `min(num_new_blocks, original_count)` decoder blocks via `copy.deepcopy` (full independent storage — no shared buffers), zero-inits each clone's residual projections (`mlp.down_proj.weight + bias` and `self_attn.o_proj.weight + bias`) so the appended block initially acts as identity per the LLaMA Pro paper §3.1, appends to `model.model.layers`, and updates `model.config.num_hidden_layers`. Validates `num_new_blocks` via `validate_expand_layers` (bool-guard + `[1, 64]`) BEFORE any model mutation. `_get_layers_module` uses explicit `is None` check (not falsy shortcut) to defend against `nn.Module.__bool__` overrides on subclasses (code-review HIGH fix). `_zero_init_block_residual` returns `bool` and the caller emits `warnings.warn` when neither standard projection path matches the cloned block (non-Llama-shaped arch — security-review LOW fix surfaces silent-degradation to operators training on Falcon-style models). Over-expansion silently clamps to `min(n, original_count)` rather than raising — matches the project's defensive-fallback policy for advisory operations. New `apply_llama_pro_freeze(model, num_new_blocks) -> int` is the canonical "train only new blocks" companion (global `requires_grad=False` pass, then unfreeze the tail N blocks; returns trainable parameter count). New shared helper `apply_block_expansion_if_configured(model, tcfg, console)` centralises the "if `expand_layers` is set, expand + optionally freeze + print" sequence — used identically by SFT and Pretrain trainers (matches v0.40.6 `peft_wiring` centralisation policy; defends against drift between trainer call sites which would otherwise produce subtle inconsistent behaviour). (#74 HF push surface QA) Manual QA of `soup push`, `soup train --push-as`, `soup data push`, `soup deploy hf-space` deferred to a contributor with private HF credentials — entry recorded in `tests/qa/v053_qa.md` with the full test plan + acceptance criteria. The HF push security surface (repo_id validation, token resolution, commit message sanitization, model card injection defence, Space template containment) is unchanged from v0.29.0 / v0.40.2 and remains covered by `test_hf_integration.py` + `test_v0402_part_a.py`. Test surface: 1 new test file (`tests/test_v0534.py`) carrying 49 new tests + 7 net updates to v0.49.0 / v0.41.0 / v0.10.x regression tests. Known limitations: (1) LongLoRA S² forward override still deferred to v0.49.1 — schema gate hardened, live monkeypatch is the next deliverable. (2) Mixtral excluded from LongLoRA allowlist (MoE attention forward signature differs). (3) Block-expansion zero-init covers Llama-shaped blocks only — non-standard arches still get appended + trainable, but lose the LLaMA Pro identity-init guarantee (and emit a runtime warning). (4) Llama 3.1 RoPE auto-detect only fires when caller passes `rope_scaling_type=None` (explicit pick wins). (5) #74 live QA against a private HF repo is the v0.53.5+ follow-up. (v0.53.4)
154
+ - **v0.53.3 — GRPO Plus partial wiring (#128 grpo_fp16, #129 vision-VLM probe)**: lifts two surgical v0.50.0 GRPO Plus deferred stubs while keeping the project's hardening invariants; the four larger items (#127 stability callback, #123 6 GRPO variant loss kernels, #126 PRMTrainerWrapper, #68 multi-objective preference live combine) are scope-deferred to v0.53.4. (#128 grpo_fp16 routing) New `_validate_grpo_fp16_amp_exclusive` SoupConfig cross-validator rejects the silent-mutex combo `grpo_fp16=True + auto_mixed_precision=True` at config load — both flags pick the mixed-precision dtype via different codepaths; combining them is a footgun where downstream behaviour depends on validator execution order. Cross-validator short-circuits when `task != 'grpo'` so the v0.50.0 stability task-gate diagnosis fires first (keeps the most actionable error at the front; code-review HIGH fix). New `GRPOTrainerWrapper._build_precision_kwargs(self) -> dict[str, bool]` returns the `{fp16, bf16}` HF kwargs per `(device, grpo_fp16)` matrix: non-CUDA (CPU / MPS / XPU) → both False (HF Trainer's fp16/bf16 kwargs are CUDA-specific, MPS / XPU use their own mixed-precision paths), CUDA + `grpo_fp16=True` → `fp16=True, bf16=False` (unsloth parity), default CUDA → `fp16=False, bf16=True` (legacy v0.50.0 path). Direct attribute access on `self.config.training.grpo_fp16` (no `getattr` fallback — Pydantic-guaranteed field). (#129 vision-GRPO base probe) New `soup_cli/utils/prm.KNOWN_VLM_REGEX` compiled regex with 10 word-boundary alternatives covering Qwen2-VL / Qwen2.5-VL / QVQ / Pixtral / InternVL / InternVL2_5 / InternVL3 / Llama-3.2-Vision (any size via `[a-z0-9._-]*vision` glob) / LLaVA / MiniCPM-V / Idefics / ShareGPT4V / Fuyu. Word-boundary idiom `(?:^|[^a-z0-9])…(?:[^a-z0-9]|$)` mirrors v0.39.0 `is_gemma4_model` / v0.44.0 `is_llama4_model` / v0.49.0 `is_llama_model` policy — rejects substring noise like `"my-pixtralish"`. New `is_known_vlm_base(name: object) -> bool` is defensive — returns False (never raises) on non-string / bool / empty / null-byte / `>_MAX_BASE_NAME_LEN=512`. Extended `validate_vision_grpo_compat` with optional `base: str | None = None` kwarg — `None` / empty-string skips the probe (back-compat for legacy v0.50.0 Part E callers); non-empty-non-VLM raises `ValueError` with friendly message naming the expected families (Qwen2-VL / Pixtral / InternVL / Llama-3.2-Vision / LLaVA / MiniCPM-V). Error message **truncates the echoed `base` to 64 chars** before serialisation (security-review MEDIUM fix mirroring v0.34.0 `crash.py` `output_dir` basename policy — defends against adversarial / long bases bloating error logs and from leaking unredacted user input into operator-facing tracebacks). `_validate_vision_grpo` in SoupConfig threads `base=self.base` so a YAML pairing `vision_grpo: true` with a non-VLM checkpoint is rejected at schema-load instead of surfacing as a cryptic `"module has no attribute 'vision_tower'"` runtime error. Test surface: 1 new test file (`test_v0533.py`) carrying 37 new tests covering: every `_build_precision_kwargs` matrix cell (CUDA + grpo_fp16 / default CUDA / CPU / MPS), every cross-validator branch (mutex rejection / task-gate priority / both-off pass), every regex alternative (Qwen2-VL / Pixtral / QVQ / Llama-3.2-Vision variants / negative matches), every defensive guard (bool / non-string / null-byte / 512-byte boundary), error-message truncation (security-review M regression), and end-to-end YAML load (happy + reject). Known limitations: (1) Scope-deferred — 4 larger v0.53.3 items moved to v0.53.4 because each requires deep TRL subclassing and warrants its own focused release; the v0.40.x stub-then-live cadence shipped 5 patch releases over 6 weeks, mirroring that here. (2) VLM allowlist is static name-regex only; a legitimate VLM published under an org whose checkpoint name lacks any of those tokens (e.g. a custom internal fork) is rejected at schema-load and operators must omit `vision_grpo: true` until a future release adds a runtime `model.config.vision_config` probe. (3) `_build_precision_kwargs` is GRPO-only — other RL trainers (PPO / RewardModel) follow their existing mixed-precision conventions. (v0.53.3)
155
+
151
156
  - **v0.53.2 — Modality II live trainers**: lifts four v0.52.0 deferred stubs (#137, #135, #133, #132) into real trainer wrappers while keeping the project's hardening invariants. (#137 reasoning_effort + train_on_eot) `apply_reasoning_effort_prefix` follows v0.41.0 / v0.51.0 validator policy (bool-first, null-byte / empty / oversize / case-insensitive normalisation); messages list is treated as immutable (returns a new list — matches v0.33.0 #47 `CrossDocCollator` policy). `build_assistant_only_labels(train_on_eot=True)` reuses the existing v0.36.0 mask infrastructure — same null-byte / max_length / bool guards. (#135 EBFT / GDPO) `apply_ebft_loss` and `apply_gdpo_loss` enforce **finite-only inputs** (`torch.isfinite` guard on tensor inputs + `math.isfinite` on scalar params) — NaN / Inf would silently corrupt training otherwise. `dpo_margin` defaults to `None` (not `0.0`) per security-review M3 fix: silent zeroing in the `margin` variant when the operator forgot to set the margin would have looked like training success but produced a meaningless gradient. Both attach hooks (`attach_ebft_compute_loss`, `attach_gdpo_compute_loss`) are **idempotent** via a marker attribute on the wrapped method — re-attach is a no-op and a dedicated test class verifies the invariant (code-review M2 fix). (#133 DistillTrainerWrapper) **Separate trust_remote_code resolution for student and teacher** (security-review L2 fix): `model_requires_trust_remote_code(teacher)` runs independently of the student probe, otherwise a malicious teacher could piggy-back on the student's opt-in. Teacher is loaded with `device_map="cpu" if device == "cpu" else "auto"`, frozen via `requires_grad_(False)` + `.eval()` immediately after load — never participates in gradient computation. `_DistillTrainer.compute_loss` device-bridge: `teacher_device = next(teacher_ref.parameters()).device`, `teacher_inputs.to(teacher_device)` before teacher forward, `teacher_logits.to(student_logits.device)` before KL kernel — defends against HF Trainer's auto-CUDA promotion silently producing cross-device `index_select` crashes. **DataCollator correctness fix** (surfaced during Wave 3 CPU smoke): `DataCollatorForLanguageModeling` does NOT pad pre-tokenised `labels` — switched to `DataCollatorForSeq2Seq(label_pad_token_id=-100, padding=True)` so variable-length loss-masked rows batch correctly without runtime crash. (#132 ClassifierTrainerWrapper) `_normalise_label` caps multi-label entries at **1024 per row** (matches v0.52.0 schema cap; security-review HIGH fix — unbounded would allow OOM via crafted JSONL), dedups via set conversion, validates `label_names` map entries reject null bytes + empty strings. `problem_type` is set explicitly from `tcfg.classifier_kind` (not silently inferred from labels) so a multi-label-shaped row in a single-label config raises rather than mis-trains. Training Setup Panel renders `Head: num_labels=N, kind=...` for classifier-family tasks instead of meaningless LoRA r/alpha lines (code-review L3 cosmetic fix — Panel no longer mis-represents what the wrapper is doing). (Cross-cutting) `commands/train.py` task routing branches added for `distill` and `classifier` / `reranker` / `cross_encoder` — source-grep regression guards in the test suite use the **full instantiation expression** `DistillTrainerWrapper(cfg, **trainer_kwargs)` so comment-only mentions of the class name cannot satisfy the regression check (TDD-review hardening). Both new factories (`build_distill_trainer`, `build_classifier_trainer`) reject unknown kwargs via Python signature contract — dedicated `pytest.raises(TypeError)` tests cover the path (TDD-review L1 fix). Test surface: 1 new test file (`test_v0532.py`) carrying 120 new tests across 14 classes. Known limitations: (1) `#71` TinyLlama-1.1B-LoRA full ONNX export is host-RAM-bound (≥16 GB free RAM needed for the `onnx.load(load_external_data=True)` post-process step); tiny-gpt2 smoke proves pipeline integrity — recorded in `tests/qa/v053_qa.md`. (2) Distillation supports same-tokenizer pairs only — cross-tokenizer (Llama → Qwen) needs a projection or sequence-level loss, out of scope. (3) Classifier wrapper has no LoRA path — full head + base training; LoRA classifier finetuning is a follow-up. (4) EBFT / GDPO auto-attach only fires when the corresponding `*_variant` field is set; manual `attach_*` invocation from custom training loops is supported and idempotent. (5) `reasoning_effort` injection happens at data-prep time inside `build_format_row`; changing the level between runs requires re-rendering the dataset. (v0.53.2)
152
157
 
153
158
  - **v0.53.1 — Quant Menu II + Export pipeline live**: lifts six v0.53.0 deferred stubs to live wiring while keeping the project's hardening invariants. New shared helper `soup_cli/utils/paths.enforce_under_cwd_and_no_symlink` consolidates the v0.33.0 #22 TOCTOU pattern (cwd containment via `os.path.realpath + os.path.commonpath` + `os.lstat + S_ISLNK` rejection) — used by `commands/merge.py`, `commands/export.py`, `utils/save_formats.py`, and `utils/gguf_quant.py` so the same boundary check fires at every CLI dispatch point. `merge_4bit` and `export_torchao` (`utils/save_formats.py`): cwd containment + symlink rejection on `merged_dir` / `model_dir` / `output_dir`; `load_quant_config` enforces `yaml.safe_load` only + 256 KB cap + extension allowlist (`.yaml`/`.yml`); **per-scheme closed kwarg allowlist** rejects dunder keys + unknown params before the splat into `torchao.<scheme>Config(**kwargs)` (security-review HIGH fix — `Int4WeightOnly` accepts `{group_size, inner_k_tiles}`, `NVFP4` accepts nothing extra). Corrected BNB-4bit skip-modules kwarg name from `llm_int8_skip_modules` to `bnb_4bit_skip_modules`. `export_advanced_gguf` (`utils/gguf_quant.py`): all three subprocess invocations (`convert_hf_to_gguf.py`, `llama-imatrix`, `llama-quantize`) use argv-list form with no shell, 30-min timeout, `sys.executable` for the convert script; `_run_convert_to_f16` realpath-verifies that `convert_hf_to_gguf.py` stays inside the `llama_cpp_dir` after resolution (security-review HIGH M5 fix — defends against a symlinked script escape). `_prepare_calibration_text` strips null bytes, collapses newlines to spaces, caps per-line at 8 KB + total at 50 MB (security-review M1), uses POSIX `O_NOFOLLOW` to refuse symlinks at the kernel level (security-review M3 — closes the TOCTOU window between the dispatch-time check and the actual `open()`); requires ≥ 1 usable row before invoking imatrix. `_safe_stderr` Rich-markup-escapes subprocess stderr before embedding in `RuntimeError` (security-review L4) so a crafted llama.cpp error cannot inject `[red]...[/]` into the operator-facing panel. UD-prefix stripped from flavour arg before passing to llama-quantize (`UD-Q4_K_XL` → `Q4_K_XL`). Calibration data path containment + symlink rejection fires at CLI dispatch in `commands/export.py::_export_gguf_advanced`. `detect_prequantized_format_from_path` (`autopilot/decisions.py`): cwd containment + `os.lstat + S_ISLNK` on `<model_dir>/config.json` (security-review HIGH H2 — out-of-cwd model paths silently return `None` to preserve soft-probe semantics so HF Hub repo IDs aren't rejected); null-byte rejection on `model_dir`. `commands/merge.py`: early `is_under_cwd(output)` check at CLI boundary (security-review M4) — consistent with the v0.20.0 / v0.40.2 containment-at-the-boundary policy. `deploy_measure.py`: cache file written atomically via `tempfile.mkstemp` + `os.replace` with `os.lstat + S_ISLNK` rejection on BOTH `load_cache` and `save_cache` (security-review M2 — was missing on the load side); env override `SOUP_DEPLOY_AUTOPILOT_CACHE` rejects null bytes + control chars before any path resolution and confines the override to home / cwd / tempdir; cache file gets best-effort 0o600 perms on POSIX (matches v0.26.0 registry.db policy); 1 MB cache-file cap. `_DEPLOY_MEASURE_BEFORE_GEN` / `_AFTER_FACTORY` module-level callables are documented as a non-public escape hatch (deferred until v0.46.1 live model-loader). Test surface: 4 new test files (`test_v0531_82.py` / `test_v0531_109.py` / `test_v0531_139.py` / `test_v0531_142.py`) carrying 112 new tests covering happy paths + failure modes + every security guard (POSIX symlink rejection, per-scheme kwarg allowlist, TOCTOU defences, `_MAX_CANDIDATES` cap, MINOR-verdict band, mxfp4 word boundary, BNB-alias detection, render-table markup escape). Known limitations: (1) `_DEPLOY_MEASURE_BEFORE_GEN` / `_AFTER_FACTORY` are a stop-gap until v0.46.1 ships first-party transformers / vLLM generator factories. (2) `#70` GGUF and `#72` AWQ/GPTQ manual QA smokes remain pending — require CUDA + llama.cpp build; recipes scripted in `tests/qa/v053_qa.md`. (3) BNB-4bit merge + TorchAO PTQ live happy-path is mock-covered only — CPU-only CI cannot execute the real BNB / torchao kernels. (4) `_prepare_calibration_text` accepts JSONL with `text` / `prompt` / `content` aliases + raw text fallback; other formats (parquet / markdown) are out of scope. (5) Cache key truncates `base_sha` to 16 hex chars at the call site (collision probability ≈ 1-in-2³² across ~4 billion entries). (6) Pre-quantized detection is heuristic — name regex + local `config.json` probe; HF Hub repo IDs without local download fall back to name-only matching. (7) `enforce_under_cwd_and_no_symlink` checks only the leaf path; deeper traversal relies on the per-file leaf check at each site. (v0.53.1)
@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
4
4
 
5
5
  [project]
6
6
  name = "soup-cli"
7
- version = "0.53.2"
7
+ version = "0.53.4"
8
8
  description = "Fine-tune LLMs in one command. No SSH, no config hell."
9
9
  readme = "README.md"
10
10
  license = "Apache-2.0"
@@ -1,3 +1,3 @@
1
1
  """Soup CLI — Fine-tune LLMs in one command."""
2
2
 
3
- __version__ = "0.53.2"
3
+ __version__ = "0.53.4"
@@ -2512,6 +2512,7 @@ class SoupConfig(BaseModel):
2512
2512
  task=self.task,
2513
2513
  modality=self.modality,
2514
2514
  backend=self.backend,
2515
+ base=self.base, # v0.53.3 #129 — name-regex VLM probe
2515
2516
  )
2516
2517
  except ValueError as exc:
2517
2518
  raise ValueError(str(exc)) from exc
@@ -2557,6 +2558,35 @@ class SoupConfig(BaseModel):
2557
2558
  )
2558
2559
  return self
2559
2560
 
2561
+ @model_validator(mode="after")
2562
+ def _validate_grpo_fp16_amp_exclusive(self) -> "SoupConfig":
2563
+ """v0.53.3 #128 — ``grpo_fp16`` and ``auto_mixed_precision`` are
2564
+ mutually exclusive.
2565
+
2566
+ Both flags pick the mixed-precision dtype but go through different
2567
+ codepaths (``grpo_fp16`` forces ``fp16=True, bf16=False`` on
2568
+ GRPOConfig directly; ``auto_mixed_precision`` runs the v0.32.0
2569
+ per-model + per-GPU picker). Combining them is a footgun where the
2570
+ downstream behaviour depends on order-of-evaluation — fail fast at
2571
+ config-load with a friendly message naming both flags so the user
2572
+ picks one.
2573
+ """
2574
+ # Short-circuit when task is not 'grpo' so the v0.50.0 stability
2575
+ # task-gate error fires first (code-review HIGH fix — keeps a
2576
+ # consistent "wrong-task" diagnosis ahead of the mutual-exclusion
2577
+ # one, regardless of validator execution order).
2578
+ if self.task != "grpo":
2579
+ return self
2580
+ if self.training.grpo_fp16 and self.training.auto_mixed_precision:
2581
+ raise ValueError(
2582
+ "grpo_fp16=True and auto_mixed_precision=True are mutually "
2583
+ "exclusive — both pick the mixed-precision dtype but go "
2584
+ "through different codepaths. Pick one: grpo_fp16 forces "
2585
+ "FP16 (unsloth parity), auto_mixed_precision uses the "
2586
+ "v0.32.0 per-GPU picker."
2587
+ )
2588
+ return self
2589
+
2560
2590
  @model_validator(mode="after")
2561
2591
  def _validate_hub_supported(self) -> "SoupConfig":
2562
2592
  """v0.51.0 Part E — ``hub`` other than ``hf`` requires a non-mlx
@@ -54,6 +54,32 @@ class GRPOTrainerWrapper:
54
54
  self.tokenizer = None
55
55
  self.trainer = None
56
56
 
57
+ def _build_precision_kwargs(self) -> dict[str, bool]:
58
+ """Resolve fp16/bf16 kwargs for GRPOConfig (v0.53.3 #128).
59
+
60
+ Priority:
61
+ - Non-CUDA device (CPU / MPS / XPU) → no mixed precision (both
62
+ False). HF Trainer's fp16/bf16 kwargs are CUDA-specific; non-CUDA
63
+ backends must use their own mixed-precision path (MPS Metal,
64
+ XPU IPEX). Documented explicitly so future MPS work doesn't
65
+ regress this branch silently.
66
+ - ``grpo_fp16=True`` (CUDA) → ``fp16=True, bf16=False`` (unsloth
67
+ parity).
68
+ - Default CUDA → ``fp16=False, bf16=True`` (legacy v0.50.0 path).
69
+
70
+ ``auto_mixed_precision`` is mutually exclusive with ``grpo_fp16``
71
+ (rejected at schema load via ``_validate_grpo_fp16_amp_exclusive``);
72
+ when only ``auto_mixed_precision`` is set, the v0.32.0 picker runs
73
+ elsewhere in the training loop and overrides this default.
74
+ """
75
+ if self.device != "cuda":
76
+ return {"fp16": False, "bf16": False}
77
+ # grpo_fp16 is a Pydantic field with default=False; direct attribute
78
+ # access (no getattr fallback) so a typo would fail loudly.
79
+ if self.config.training.grpo_fp16:
80
+ return {"fp16": True, "bf16": False}
81
+ return {"fp16": False, "bf16": True}
82
+
57
83
  def setup(self, dataset: dict):
58
84
  """Load model, tokenizer, apply LoRA, create GRPO trainer."""
59
85
  from datasets import Dataset
@@ -166,7 +192,7 @@ class GRPOTrainerWrapper:
166
192
  "logging_steps": tcfg.logging_steps,
167
193
  "save_steps": tcfg.save_steps,
168
194
  "save_total_limit": 3,
169
- "bf16": self.device == "cuda",
195
+ **self._build_precision_kwargs(),
170
196
  "report_to": self.report_to,
171
197
  "remove_unused_columns": False,
172
198
  "deepspeed": self.deepspeed_config,
@@ -264,6 +264,13 @@ class PretrainTrainerWrapper:
264
264
  if tcfg.quantization in ("4bit", "8bit", "mxfp4"):
265
265
  self.model = prepare_model_for_kbit_training(self.model)
266
266
 
267
+ # v0.53.4 #83 — LLaMA Pro block expansion (centralised — see SFT).
268
+ from soup_cli.utils.block_expansion import (
269
+ apply_block_expansion_if_configured,
270
+ )
271
+
272
+ apply_block_expansion_if_configured(self.model, tcfg, console)
273
+
267
274
  # LoRA — with MoE-aware target modules if moe_lora is enabled
268
275
  target_modules = tcfg.lora.target_modules
269
276
  if target_modules == "auto":
@@ -491,6 +491,17 @@ class SFTTrainerWrapper:
491
491
  f"[green]Freeze training:[/] {frozen} parameters frozen"
492
492
  )
493
493
 
494
+ # v0.53.4 #83 — LLaMA Pro block expansion. Run BEFORE LoRA so PEFT's
495
+ # target-module matcher sees the new blocks. Centralised in
496
+ # ``block_expansion.apply_block_expansion_if_configured`` to avoid
497
+ # drift between SFT and Pretrain trainers (matches v0.40.6 peft_wiring
498
+ # centralisation policy).
499
+ from soup_cli.utils.block_expansion import (
500
+ apply_block_expansion_if_configured,
501
+ )
502
+
503
+ apply_block_expansion_if_configured(self.model, tcfg, console)
504
+
494
505
  # LoRA — with MoE-aware target modules if moe_lora is enabled
495
506
  target_modules = tcfg.lora.target_modules
496
507
  if target_modules == "auto":