soup-cli 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. soup_cli-0.1.0/.claude/settings.json +27 -0
  2. soup_cli-0.1.0/.github/workflows/ci.yml +31 -0
  3. soup_cli-0.1.0/.gitignore +39 -0
  4. soup_cli-0.1.0/CLAUDE.md +97 -0
  5. soup_cli-0.1.0/LICENSE +21 -0
  6. soup_cli-0.1.0/PKG-INFO +346 -0
  7. soup_cli-0.1.0/README.md +302 -0
  8. soup_cli-0.1.0/pyproject.toml +64 -0
  9. soup_cli-0.1.0/soup.png +0 -0
  10. soup_cli-0.1.0/soup_cli/__init__.py +3 -0
  11. soup_cli-0.1.0/soup_cli/__main__.py +5 -0
  12. soup_cli-0.1.0/soup_cli/cli.py +40 -0
  13. soup_cli-0.1.0/soup_cli/commands/__init__.py +0 -0
  14. soup_cli-0.1.0/soup_cli/commands/chat.py +251 -0
  15. soup_cli-0.1.0/soup_cli/commands/data.py +371 -0
  16. soup_cli-0.1.0/soup_cli/commands/eval.py +197 -0
  17. soup_cli-0.1.0/soup_cli/commands/init.py +97 -0
  18. soup_cli-0.1.0/soup_cli/commands/push.py +234 -0
  19. soup_cli-0.1.0/soup_cli/commands/runs.py +325 -0
  20. soup_cli-0.1.0/soup_cli/commands/train.py +135 -0
  21. soup_cli-0.1.0/soup_cli/config/__init__.py +0 -0
  22. soup_cli-0.1.0/soup_cli/config/loader.py +31 -0
  23. soup_cli-0.1.0/soup_cli/config/schema.py +141 -0
  24. soup_cli-0.1.0/soup_cli/data/__init__.py +0 -0
  25. soup_cli-0.1.0/soup_cli/data/formats.py +157 -0
  26. soup_cli-0.1.0/soup_cli/data/loader.py +146 -0
  27. soup_cli-0.1.0/soup_cli/data/validator.py +148 -0
  28. soup_cli-0.1.0/soup_cli/experiment/__init__.py +0 -0
  29. soup_cli-0.1.0/soup_cli/experiment/tracker.py +274 -0
  30. soup_cli-0.1.0/soup_cli/monitoring/__init__.py +0 -0
  31. soup_cli-0.1.0/soup_cli/monitoring/callback.py +89 -0
  32. soup_cli-0.1.0/soup_cli/monitoring/display.py +85 -0
  33. soup_cli-0.1.0/soup_cli/trainer/__init__.py +0 -0
  34. soup_cli-0.1.0/soup_cli/trainer/dpo.py +202 -0
  35. soup_cli-0.1.0/soup_cli/trainer/sft.py +209 -0
  36. soup_cli-0.1.0/soup_cli/utils/__init__.py +0 -0
  37. soup_cli-0.1.0/soup_cli/utils/constants.py +17 -0
  38. soup_cli-0.1.0/soup_cli/utils/gpu.py +115 -0
  39. soup_cli-0.1.0/templates/chat.yaml +24 -0
  40. soup_cli-0.1.0/templates/code.yaml +24 -0
  41. soup_cli-0.1.0/templates/medical.yaml +25 -0
  42. soup_cli-0.1.0/tests/__init__.py +0 -0
  43. soup_cli-0.1.0/tests/conftest.py +67 -0
  44. soup_cli-0.1.0/tests/test_callback.py +100 -0
  45. soup_cli-0.1.0/tests/test_chat.py +76 -0
  46. soup_cli-0.1.0/tests/test_cli.py +70 -0
  47. soup_cli-0.1.0/tests/test_config.py +75 -0
  48. soup_cli-0.1.0/tests/test_data.py +85 -0
  49. soup_cli-0.1.0/tests/test_data_tools.py +229 -0
  50. soup_cli-0.1.0/tests/test_display.py +99 -0
  51. soup_cli-0.1.0/tests/test_eval.py +22 -0
  52. soup_cli-0.1.0/tests/test_formats.py +174 -0
  53. soup_cli-0.1.0/tests/test_gpu.py +24 -0
  54. soup_cli-0.1.0/tests/test_init.py +91 -0
  55. soup_cli-0.1.0/tests/test_loader.py +87 -0
  56. soup_cli-0.1.0/tests/test_push.py +126 -0
  57. soup_cli-0.1.0/tests/test_runs.py +121 -0
  58. soup_cli-0.1.0/tests/test_smoke_train.py +211 -0
  59. soup_cli-0.1.0/tests/test_tracker.py +241 -0
  60. soup_cli-0.1.0/tests/test_validator.py +139 -0
@@ -0,0 +1,27 @@
1
+ {
2
+ "permissions": {
3
+ "allow": [
4
+ "Bash(git status*)",
5
+ "Bash(git diff*)",
6
+ "Bash(git log*)",
7
+ "Bash(git add*)",
8
+ "Bash(git commit*)",
9
+ "Bash(git push*)",
10
+ "Bash(git branch*)",
11
+ "Bash(git checkout*)",
12
+ "Bash(git stash*)",
13
+ "Bash(git remote*)",
14
+ "Bash(ruff check*)",
15
+ "Bash(ruff format*)",
16
+ "Bash(python -m ruff*)",
17
+ "Bash(pytest*)",
18
+ "Bash(python -m pytest*)",
19
+ "Bash(pip install*)",
20
+ "Bash(pip list*)",
21
+ "Bash(soup *)",
22
+ "Bash(python -m soup_cli*)",
23
+ "Bash(cd /c/Users/tokmo/peder/Soup && python -m pytest*)",
24
+ "Bash(cd /c/Users/tokmo/peder/Soup && ruff check*)"
25
+ ]
26
+ }
27
+ }
@@ -0,0 +1,31 @@
1
+ name: CI
2
+
3
+ on:
4
+ push:
5
+ branches: [main]
6
+ pull_request:
7
+ branches: [main]
8
+
9
+ jobs:
10
+ lint:
11
+ runs-on: ubuntu-latest
12
+ steps:
13
+ - uses: actions/checkout@v4
14
+ - uses: actions/setup-python@v5
15
+ with:
16
+ python-version: "3.11"
17
+ - run: pip install ruff
18
+ - run: ruff check soup_cli/ tests/
19
+
20
+ test:
21
+ runs-on: ubuntu-latest
22
+ strategy:
23
+ matrix:
24
+ python-version: ["3.9", "3.11", "3.12"]
25
+ steps:
26
+ - uses: actions/checkout@v4
27
+ - uses: actions/setup-python@v5
28
+ with:
29
+ python-version: ${{ matrix.python-version }}
30
+ - run: pip install -e ".[dev]"
31
+ - run: pytest tests/ -v --tb=short
@@ -0,0 +1,39 @@
1
+ __pycache__/
2
+ *.py[cod]
3
+ *$py.class
4
+ *.egg-info/
5
+ dist/
6
+ build/
7
+ *.egg
8
+ .eggs/
9
+
10
+ # Virtual env
11
+ .venv/
12
+ venv/
13
+ env/
14
+
15
+ # IDE
16
+ .vscode/
17
+ .idea/
18
+ *.swp
19
+ *.swo
20
+
21
+ # OS
22
+ .DS_Store
23
+ Thumbs.db
24
+
25
+ # Training outputs
26
+ output/
27
+ checkpoints/
28
+ wandb/
29
+
30
+ # Soup local data
31
+ .soup/
32
+ *.db
33
+
34
+ # Secrets
35
+ .env
36
+ *.key
37
+
38
+ # Internal plan (not for repo)
39
+ plan.md
@@ -0,0 +1,97 @@
1
+ # CLAUDE.md
2
+
3
+ This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
4
+
5
+ ## Build & Development Commands
6
+
7
+ ```bash
8
+ # Install in dev mode (editable + test deps)
9
+ pip install -e ".[dev]"
10
+
11
+ # Run all tests
12
+ pytest tests/ -v --tb=short
13
+
14
+ # Run a single test file
15
+ pytest tests/test_config.py -v
16
+
17
+ # Run a single test
18
+ pytest tests/test_data.py::test_detect_alpaca_format -v
19
+
20
+ # Lint
21
+ ruff check soup_cli/ tests/
22
+
23
+ # Lint with auto-fix
24
+ ruff check --fix soup_cli/ tests/
25
+ ```
26
+
27
+ ## Architecture
28
+
29
+ Soup is a CLI-first tool for LLM fine-tuning. The core flow:
30
+
31
+ ```
32
+ soup train --config soup.yaml
33
+ → config/loader.py (YAML → Pydantic SoupConfig)
34
+ → utils/gpu.py (detect CUDA/MPS/CPU, estimate batch size)
35
+ → data/loader.py (load file or HF dataset → normalize format)
36
+ → trainer/sft.py (load model → quantize → apply LoRA → train)
37
+ → monitoring/callback.py + display.py (live Rich dashboard)
38
+ → experiment/tracker.py (log run + metrics to SQLite)
39
+ → save LoRA adapter to output/
40
+ ```
41
+
42
+ **Config system:** `config/schema.py` is the single source of truth. All YAML fields are validated by Pydantic models (`SoupConfig` → `TrainingConfig` → `LoraConfig`, `DataConfig`). Templates (chat/code/medical) live as YAML strings in this file.
43
+
44
+ **Data pipeline:** `data/loader.py` handles local files (JSONL/JSON/CSV/Parquet) and HuggingFace datasets. `data/formats.py` auto-detects and normalizes alpaca/sharegpt/chatml formats into a unified `{"messages": [...]}` structure. Also supports reverse conversion via `messages_to_format()`.
45
+
46
+ **Trainer:** `trainer/sft.py` (`SFTTrainerWrapper`) and `trainer/dpo.py` (`DPOTrainerWrapper`) wrap HuggingFace's SFTTrainer/DPOTrainer with auto quantization (BitsAndBytes), LoRA (PEFT), and batch size estimation. Heavy ML imports are lazy (inside methods) so CLI stays fast for non-training commands.
47
+
48
+ **Monitoring:** `monitoring/callback.py` is a HuggingFace `TrainerCallback` that streams metrics to `monitoring/display.py` (Rich Live panel at 2Hz) and optionally to the experiment tracker.
49
+
50
+ **Experiment tracking:** `experiment/tracker.py` (`ExperimentTracker`) stores runs, per-step metrics, and eval results in SQLite at `~/.soup/experiments.db`. Automatically integrated into `soup train`. Commands: `soup runs`, `soup runs show`, `soup runs compare`, `soup runs delete`.
51
+
52
+ **Data tools:** `commands/data.py` provides inspect, validate, convert (between alpaca/sharegpt/chatml), merge, dedup (MinHash via datasketch), and stats (extended statistics with plotext histograms).
53
+
54
+ **Eval:** `commands/eval.py` wraps lm-evaluation-harness for model evaluation on standard benchmarks (mmlu, gsm8k, etc.) with results saved to the experiment tracker.
55
+
56
+ ## Code Conventions
57
+
58
+ - **Line length:** 100 chars (ruff enforced)
59
+ - **Linter:** ruff with E, F, I, N, W rules
60
+ - **Config validation:** Always Pydantic v2 (BaseModel + Field)
61
+ - **CLI framework:** Typer with `rich_markup_mode="rich"`
62
+ - **Output:** Use `rich.console.Console` — never bare `print()`
63
+ - **Lazy imports:** Heavy deps (torch, transformers, peft, datasketch, lm_eval, plotext) are imported inside functions, not at module level
64
+ - **Variable naming:** Avoid single-letter names (ruff E741) — use `entry`, `part`, `length` instead of `l`
65
+ - **Testing:** Rich Panel objects must be rendered via `Console(file=StringIO())` for string assertions, not `str(panel)`
66
+
67
+ ## Git Workflow
68
+
69
+ - Repo: https://github.com/MakazhanAlpamys/Soup
70
+ - Branch: `main`
71
+ - CI: GitHub Actions runs ruff lint + pytest on Python 3.9/3.11/3.12
72
+ - Always run `ruff check soup_cli/ tests/` before committing
73
+ - Always run `pytest tests/ -v` before committing
74
+
75
+ ## Tests
76
+
77
+ Test suite (~147 tests) lives in `tests/`:
78
+
79
+ | File | Covers |
80
+ |---|---|
81
+ | `test_config.py` | Config loading, validation, defaults |
82
+ | `test_data.py` | Format detection, conversion, validation |
83
+ | `test_gpu.py` | GPU detection, batch size estimation |
84
+ | `test_cli.py` | CLI commands basic validation |
85
+ | `test_tracker.py` | SQLite experiment tracker |
86
+ | `test_runs.py` | `soup runs` CLI commands |
87
+ | `test_data_tools.py` | Data convert/merge/dedup/stats commands |
88
+ | `test_eval.py` | Eval command |
89
+ | `test_smoke_train.py` | Full pipeline smoke tests (GPU) |
90
+ | `test_chat.py` | Chat command, `_detect_base_model` |
91
+ | `test_push.py` | Push command, `_format_size`, `_generate_model_card` |
92
+ | `test_init.py` | Init command, templates, overwrite logic |
93
+ | `test_callback.py` | `SoupTrainerCallback` (mock-based) |
94
+ | `test_display.py` | `TrainingDisplay` rendering |
95
+ | `test_loader.py` | Data loading (JSONL/JSON/CSV, edge cases) |
96
+ | `test_validator.py` | `validate_and_stats`, `extended_stats`, `_percentile` |
97
+ | `test_formats.py` | Reverse conversion, round-trips, edge cases |
soup_cli-0.1.0/LICENSE ADDED
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Soup Team
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,346 @@
1
+ Metadata-Version: 2.4
2
+ Name: soup-cli
3
+ Version: 0.1.0
4
+ Summary: Fine-tune LLMs in one command. No SSH, no config hell.
5
+ Project-URL: Homepage, https://github.com/MakazhanAlpamys/Soup
6
+ Project-URL: Repository, https://github.com/MakazhanAlpamys/Soup
7
+ Project-URL: Issues, https://github.com/MakazhanAlpamys/Soup/issues
8
+ Author: Soup Team
9
+ License-Expression: MIT
10
+ License-File: LICENSE
11
+ Keywords: fine-tuning,llm,lora,machine-learning,qlora
12
+ Classifier: Development Status :: 3 - Alpha
13
+ Classifier: Intended Audience :: Developers
14
+ Classifier: Intended Audience :: Science/Research
15
+ Classifier: License :: OSI Approved :: MIT License
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
18
+ Requires-Python: >=3.9
19
+ Requires-Dist: accelerate>=0.25.0
20
+ Requires-Dist: bitsandbytes>=0.41.0
21
+ Requires-Dist: datasets>=2.14.0
22
+ Requires-Dist: huggingface-hub>=0.16.0
23
+ Requires-Dist: peft>=0.7.0
24
+ Requires-Dist: plotext>=5.2.0
25
+ Requires-Dist: pydantic>=2.0.0
26
+ Requires-Dist: pyyaml>=6.0
27
+ Requires-Dist: rich>=13.0.0
28
+ Requires-Dist: torch>=2.0.0
29
+ Requires-Dist: transformers>=4.36.0
30
+ Requires-Dist: trl>=0.7.0
31
+ Requires-Dist: typer>=0.9.0
32
+ Provides-Extra: data
33
+ Requires-Dist: datasketch>=1.6.0; extra == 'data'
34
+ Provides-Extra: dev
35
+ Requires-Dist: pytest-cov>=4.0; extra == 'dev'
36
+ Requires-Dist: pytest>=7.0; extra == 'dev'
37
+ Requires-Dist: ruff>=0.1.0; extra == 'dev'
38
+ Provides-Extra: eval
39
+ Requires-Dist: lm-eval>=0.4.0; extra == 'eval'
40
+ Provides-Extra: ui
41
+ Requires-Dist: fastapi>=0.104.0; extra == 'ui'
42
+ Requires-Dist: uvicorn>=0.24.0; extra == 'ui'
43
+ Description-Content-Type: text/markdown
44
+
45
+ <p align="center">
46
+ <img src="soup.png" alt="Soup" width="200">
47
+ </p>
48
+
49
+ <h1 align="center">Soup</h1>
50
+
51
+ <p align="center">
52
+ <strong>Fine-tune LLMs in one command. No SSH, no config hell.</strong>
53
+ </p>
54
+
55
+ <p align="center">
56
+ <a href="#quick-start">Quick Start</a> &middot;
57
+ <a href="#features">Features</a> &middot;
58
+ <a href="#data-tools">Data Tools</a> &middot;
59
+ <a href="#experiment-tracking">Tracking</a> &middot;
60
+ <a href="#model-evaluation">Eval</a> &middot;
61
+ <a href="#all-commands">Commands</a>
62
+ </p>
63
+
64
+ <p align="center">
65
+ <img src="https://img.shields.io/badge/python-3.9%2B-blue" alt="Python 3.9+">
66
+ <img src="https://img.shields.io/badge/license-MIT-green" alt="MIT License">
67
+ <img src="https://img.shields.io/badge/tests-147%20passed-brightgreen" alt="Tests">
68
+ <a href="https://github.com/MakazhanAlpamys/Soup/actions"><img src="https://github.com/MakazhanAlpamys/Soup/actions/workflows/ci.yml/badge.svg" alt="CI"></a>
69
+ </p>
70
+
71
+ ---
72
+
73
+ Soup turns the pain of LLM fine-tuning into a simple workflow. One config, one command, done.
74
+
75
+ ```bash
76
+ pip install git+https://github.com/MakazhanAlpamys/Soup.git
77
+ soup init --template chat
78
+ soup train
79
+ ```
80
+
81
+ ## Why Soup?
82
+
83
+ Training LLMs is still painful. Even experienced teams spend 30-50% of their time fighting infrastructure instead of improving models. Soup fixes that.
84
+
85
+ - **Zero SSH.** Never SSH into a broken GPU box again.
86
+ - **One config.** A simple YAML file is all you need.
87
+ - **Auto everything.** Batch size, GPU detection, quantization — handled.
88
+ - **Works locally.** Train on your own GPU with QLoRA. No cloud required.
89
+
90
+ ## Quick Start
91
+
92
+ ### 1. Install
93
+
94
+ ```bash
95
+ # From GitHub (recommended for now):
96
+ pip install git+https://github.com/MakazhanAlpamys/Soup.git
97
+
98
+ # From PyPI (coming soon):
99
+ # pip install soup-cli
100
+ ```
101
+
102
+ ### 2. Create config
103
+
104
+ ```bash
105
+ # Interactive wizard
106
+ soup init
107
+
108
+ # Or use a template
109
+ soup init --template chat # conversational fine-tune
110
+ soup init --template code # code generation
111
+ soup init --template medical # domain expert
112
+ ```
113
+
114
+ ### 3. Train
115
+
116
+ ```bash
117
+ soup train --config soup.yaml
118
+ ```
119
+
120
+ That's it. Soup handles LoRA setup, quantization, batch size, monitoring, and checkpoints.
121
+
122
+ ### 4. Test your model
123
+
124
+ ```bash
125
+ soup chat --model ./output
126
+ ```
127
+
128
+ ### 5. Push to HuggingFace
129
+
130
+ ```bash
131
+ soup push --model ./output --repo your-username/my-model
132
+ ```
133
+
134
+ ## Config Example
135
+
136
+ ```yaml
137
+ base: meta-llama/Llama-3.1-8B-Instruct
138
+ task: sft
139
+
140
+ data:
141
+ train: ./data/train.jsonl
142
+ format: alpaca
143
+ val_split: 0.1
144
+
145
+ training:
146
+ epochs: 3
147
+ lr: 2e-5
148
+ batch_size: auto
149
+ lora:
150
+ r: 64
151
+ alpha: 16
152
+ quantization: 4bit
153
+
154
+ output: ./output
155
+ ```
156
+
157
+ ## DPO Training
158
+
159
+ Train with preference data using Direct Preference Optimization:
160
+
161
+ ```yaml
162
+ base: meta-llama/Llama-3.1-8B-Instruct
163
+ task: dpo
164
+
165
+ data:
166
+ train: ./data/preferences.jsonl
167
+ format: dpo
168
+
169
+ training:
170
+ epochs: 3
171
+ dpo_beta: 0.1
172
+ lora:
173
+ r: 64
174
+ alpha: 16
175
+ quantization: 4bit
176
+ ```
177
+
178
+ ## Chat with your model
179
+
180
+ ```bash
181
+ # Chat with a LoRA adapter (auto-detects base model)
182
+ soup chat --model ./output
183
+
184
+ # Specify base model explicitly
185
+ soup chat --model ./output --base meta-llama/Llama-3.1-8B-Instruct
186
+
187
+ # Adjust generation
188
+ soup chat --model ./output --temperature 0.3 --max-tokens 256
189
+ ```
190
+
191
+ ## Push to HuggingFace
192
+
193
+ ```bash
194
+ # Upload model to HF Hub
195
+ soup push --model ./output --repo your-username/my-model
196
+
197
+ # Make it private
198
+ soup push --model ./output --repo your-username/my-model --private
199
+ ```
200
+
201
+ ## Data Formats
202
+
203
+ Soup supports these formats (auto-detected):
204
+
205
+ **Alpaca:**
206
+ ```json
207
+ {"instruction": "Explain gravity", "input": "", "output": "Gravity is..."}
208
+ ```
209
+
210
+ **ShareGPT:**
211
+ ```json
212
+ {"conversations": [{"from": "human", "value": "Hi"}, {"from": "gpt", "value": "Hello!"}]}
213
+ ```
214
+
215
+ **ChatML:**
216
+ ```json
217
+ {"messages": [{"role": "user", "content": "Hi"}, {"role": "assistant", "content": "Hello!"}]}
218
+ ```
219
+
220
+ **DPO (preference pairs):**
221
+ ```json
222
+ {"prompt": "Explain gravity", "chosen": "Gravity is a force...", "rejected": "I don't know"}
223
+ ```
224
+
225
+ ## Data Tools
226
+
227
+ ```bash
228
+ # Inspect a dataset
229
+ soup data inspect ./data/train.jsonl
230
+
231
+ # Validate format
232
+ soup data validate ./data/train.jsonl --format alpaca
233
+
234
+ # Convert between formats
235
+ soup data convert ./data/train.jsonl --to sharegpt --output converted.jsonl
236
+
237
+ # Merge multiple datasets
238
+ soup data merge data1.jsonl data2.jsonl --output merged.jsonl --shuffle
239
+
240
+ # Remove near-duplicates (requires: pip install 'soup-cli[data]')
241
+ soup data dedup ./data/train.jsonl --threshold 0.8
242
+
243
+ # Extended statistics (length distribution, token counts, languages)
244
+ soup data stats ./data/train.jsonl
245
+ ```
246
+
247
+ ## Experiment Tracking
248
+
249
+ Every `soup train` run is automatically tracked in a local SQLite database (`~/.soup/experiments.db`).
250
+
251
+ ```bash
252
+ # List all training runs
253
+ soup runs
254
+
255
+ # Show detailed info + loss curve for a run
256
+ soup runs show run_20260223_143052_a1b2
257
+
258
+ # Compare two runs side by side
259
+ soup runs compare run_1 run_2
260
+
261
+ # Delete a run
262
+ soup runs delete run_1
263
+ ```
264
+
265
+ ## Model Evaluation
266
+
267
+ Evaluate models on standard benchmarks using [lm-evaluation-harness](https://github.com/EleutherAI/lm-evaluation-harness):
268
+
269
+ ```bash
270
+ # Install eval dependencies
271
+ pip install 'soup-cli[eval]'
272
+
273
+ # Evaluate on benchmarks
274
+ soup eval --model ./output --benchmarks mmlu,gsm8k,hellaswag
275
+
276
+ # Link results to a training run
277
+ soup eval --model ./output --benchmarks mmlu --run-id run_20260223_143052_a1b2
278
+ ```
279
+
280
+ ## Features
281
+
282
+ | Feature | Status |
283
+ |---|---|
284
+ | LoRA / QLoRA fine-tuning | ✅ |
285
+ | SFT (Supervised Fine-Tune) | ✅ |
286
+ | DPO (Direct Preference Optimization) | ✅ |
287
+ | Auto batch size | ✅ |
288
+ | Auto GPU detection (CUDA/MPS/CPU) | ✅ |
289
+ | Live terminal dashboard | ✅ |
290
+ | Alpaca / ShareGPT / ChatML / DPO formats | ✅ |
291
+ | HuggingFace datasets support | ✅ |
292
+ | Interactive model chat | ✅ |
293
+ | Push to HuggingFace Hub | ✅ |
294
+ | Experiment tracking (SQLite) | ✅ |
295
+ | Data tools (convert, merge, dedup, stats) | ✅ |
296
+ | Model evaluation (lm-eval) | ✅ |
297
+ | Web dashboard | 🔜 |
298
+ | Cloud mode (BYOG) | 🔜 |
299
+
300
+ ## All Commands
301
+
302
+ ```
303
+ soup init [--template chat|code|medical] Create soup.yaml config
304
+ soup train --config soup.yaml [--dry-run] Start training
305
+ soup chat --model ./output Interactive chat with model
306
+ soup push --model ./output --repo user/name Upload to HuggingFace Hub
307
+ soup data inspect <path> View dataset stats
308
+ soup data validate <path> --format alpaca Check format
309
+ soup data convert <path> --to chatml Convert between formats
310
+ soup data merge data1.jsonl data2.jsonl Combine datasets
311
+ soup data dedup <path> --threshold 0.8 Remove duplicates (MinHash)
312
+ soup data stats <path> Extended statistics
313
+ soup runs List all training runs
314
+ soup runs show <run_id> Detailed run info + loss graph
315
+ soup runs compare <run_1> <run_2> Compare two runs
316
+ soup runs delete <run_id> Remove a run
317
+ soup eval --model ./output --benchmarks mmlu Evaluate on benchmarks
318
+ soup version Show version
319
+ ```
320
+
321
+ ## Requirements
322
+
323
+ - Python 3.9+
324
+ - GPU with CUDA (recommended) or Apple Silicon (MPS) or CPU (slow)
325
+ - 8 GB+ VRAM for 7B models with QLoRA
326
+
327
+ ## Development
328
+
329
+ ```bash
330
+ git clone https://github.com/MakazhanAlpamys/Soup.git
331
+ cd Soup
332
+ pip install -e ".[dev]"
333
+
334
+ # Lint
335
+ ruff check soup_cli/ tests/
336
+
337
+ # Run unit tests (fast, no GPU needed — 147 tests)
338
+ pytest tests/ -v
339
+
340
+ # Run smoke tests (downloads tiny model, runs real training)
341
+ pytest tests/ -m smoke -v
342
+ ```
343
+
344
+ ## License
345
+
346
+ MIT