PyPI - soup-cli - Versions diffs - 0.2.0__tar.gz → 0.2.2__tar.gz - Mend

soup-cli 0.2.0tar.gz → 0.2.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (67) hide show

{soup_cli-0.2.0 → soup_cli-0.2.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: soup-cli
-Version: 0.2.0
+Version: 0.2.2
 Summary: Fine-tune LLMs in one command. No SSH, no config hell.
 Project-URL: Homepage, https://github.com/MakazhanAlpamys/Soup
 Project-URL: Repository, https://github.com/MakazhanAlpamys/Soup
@@ -67,7 +67,7 @@ Description-Content-Type: text/markdown
   <a href="https://pypi.org/project/soup-cli/"><img src="https://img.shields.io/pypi/v/soup-cli?color=blue" alt="PyPI"></a>
   <img src="https://img.shields.io/badge/python-3.9%2B-blue" alt="Python 3.9+">
   <img src="https://img.shields.io/badge/license-MIT-green" alt="MIT License">
-  <img src="https://img.shields.io/badge/tests-184%20passed-brightgreen" alt="Tests">
+  <img src="https://img.shields.io/badge/tests-186%20passed-brightgreen" alt="Tests">
   <a href="https://github.com/MakazhanAlpamys/Soup/actions"><img src="https://github.com/MakazhanAlpamys/Soup/actions/workflows/ci.yml/badge.svg" alt="CI"></a>
 </p>
@@ -418,7 +418,7 @@ pip install -e ".[dev]"
 # Lint
 ruff check soup_cli/ tests/
-# Run unit tests (fast, no GPU needed — 184 tests)
+# Run unit tests (fast, no GPU needed — 186 tests)
 pytest tests/ -v
 # Run smoke tests (downloads tiny model, runs real training)

{soup_cli-0.2.0 → soup_cli-0.2.2}/README.md RENAMED Viewed

@@ -21,7 +21,7 @@
   <a href="https://pypi.org/project/soup-cli/"><img src="https://img.shields.io/pypi/v/soup-cli?color=blue" alt="PyPI"></a>
   <img src="https://img.shields.io/badge/python-3.9%2B-blue" alt="Python 3.9+">
   <img src="https://img.shields.io/badge/license-MIT-green" alt="MIT License">
-  <img src="https://img.shields.io/badge/tests-184%20passed-brightgreen" alt="Tests">
+  <img src="https://img.shields.io/badge/tests-186%20passed-brightgreen" alt="Tests">
   <a href="https://github.com/MakazhanAlpamys/Soup/actions"><img src="https://github.com/MakazhanAlpamys/Soup/actions/workflows/ci.yml/badge.svg" alt="CI"></a>
 </p>
@@ -372,7 +372,7 @@ pip install -e ".[dev]"
 # Lint
 ruff check soup_cli/ tests/
-# Run unit tests (fast, no GPU needed — 184 tests)
+# Run unit tests (fast, no GPU needed — 186 tests)
 pytest tests/ -v
 # Run smoke tests (downloads tiny model, runs real training)

soup_cli-0.2.2/TESTING_GUIDE.md ADDED Viewed

@@ -0,0 +1,196 @@
+# Soup CLI — Quick Local Test Guide (Windows)
+**Hardware:** RTX 3050 (4GB VRAM), i5
+**Model:** `TinyLlama/TinyLlama-1.1B-Chat-v1.0` (~1.1B params, ~600MB in 4-bit)
+**OS:** Windows (CMD/PowerShell)
+---
+## 0. Install
+```cmd
+pip install soup-cli
+pip install datasketch
+```
+> Unit tests (`pytest tests/`) only work from the repo clone (`pip install -e ".[dev]"`), not from pip install.
+## 1. Version & Help
+```cmd
+soup version
+soup --help
+soup train --help
+soup data --help
+```
+## 2. Init Config from Templates
+```cmd
+soup init -t chat -o test_chat.yaml
+soup init -t code -o test_code.yaml
+```
+> `soup init` without `-t` opens interactive wizard (requires terminal input).
+## 3. Create Test Dataset
+Create file `test_data.jsonl` with this content (copy-paste into any text editor, save as `test_data.jsonl`):
+```jsonl
+{"instruction": "What is Python?", "input": "", "output": "Python is a high-level programming language known for its simplicity."}
+{"instruction": "Explain recursion", "input": "", "output": "Recursion is when a function calls itself to solve smaller subproblems."}
+{"instruction": "What is a list?", "input": "", "output": "A list is an ordered, mutable collection of elements in Python."}
+{"instruction": "What is a dictionary?", "input": "", "output": "A dictionary is a key-value data structure in Python."}
+{"instruction": "What is OOP?", "input": "", "output": "OOP is a programming paradigm based on objects and classes."}
+{"instruction": "What is an API?", "input": "", "output": "An API is an interface that allows software systems to communicate."}
+{"instruction": "What is Git?", "input": "", "output": "Git is a distributed version control system for tracking code changes."}
+{"instruction": "What is Docker?", "input": "", "output": "Docker is a platform for containerizing applications."}
+{"instruction": "What is SQL?", "input": "", "output": "SQL is a language for managing and querying relational databases."}
+{"instruction": "What is REST?", "input": "", "output": "REST is an architectural style for designing networked APIs using HTTP methods."}
+```
+Or create it with Python one-liner:
+```cmd
+python -c "import json; data=[{'instruction':q,'input':'','output':a} for q,a in [('What is Python?','A high-level programming language.'),('Explain recursion','A function calling itself.'),('What is a list?','An ordered mutable collection.'),('What is OOP?','Programming with objects and classes.'),('What is Git?','A version control system.'),('What is Docker?','A containerization platform.'),('What is SQL?','A database query language.'),('What is REST?','An API architectural style.'),('What is an API?','An interface for software communication.'),('What is CSS?','A stylesheet language for web pages.')]]; f=open('test_data.jsonl','w'); [f.write(json.dumps(d)+'\n') for d in data]; f.close(); print('Created test_data.jsonl')"
+```
+## 4. Data Tools
+```cmd
+soup data inspect test_data.jsonl
+soup data validate test_data.jsonl --format alpaca
+soup data stats test_data.jsonl
+soup data convert test_data.jsonl --to sharegpt -o test_sharegpt.jsonl
+soup data convert test_data.jsonl --to chatml -o test_chatml.jsonl
+soup data inspect test_sharegpt.jsonl
+soup data inspect test_chatml.jsonl
+soup data merge test_data.jsonl test_sharegpt.jsonl -o test_merged.jsonl --shuffle
+soup data dedup test_merged.jsonl -o test_deduped.jsonl --threshold 0.8
+```
+## 5. Create Config for Training
+Create file `test_soup.yaml` (copy-paste into text editor):
+```yaml
+base: TinyLlama/TinyLlama-1.1B-Chat-v1.0
+data:
+  train: test_data.jsonl
+  format: alpaca
+  max_length: 256
+training:
+  epochs: 2
+  lr: 2e-4
+  batch_size: 2
+  quantization: 4bit
+  logging_steps: 1
+  save_steps: 50
+  lora:
+    r: 8
+    alpha: 16
+    dropout: 0.05
+output: ./test_output
+```
+## 6. Dry Run (validate without training)
+```cmd
+soup train -c test_soup.yaml --dry-run
+```
+## 7. Train
+```cmd
+soup train -c test_soup.yaml --name "local-test"
+```
+Training should take ~1-3 minutes on 3050 with this tiny dataset.
+## 8. Experiment Tracking
+```cmd
+soup runs
+```
+Copy the Run ID from the output, then:
+```cmd
+soup runs show RUN_ID_HERE
+```
+Example: `soup runs show run_20260304_004948_983f284d`
+## 9. Chat with Fine-Tuned Model
+```cmd
+soup chat -m ./test_output
+```
+Type questions, then type `exit` to quit.
+## 10. Merge LoRA
+```cmd
+soup merge -a ./test_output -o ./test_merged_model
+```
+## 11. Export to GGUF (optional, needs llama.cpp + cmake)
+```cmd
+soup export -m ./test_merged_model -q q4_k_m -o test_model.gguf
+```
+## 12. Eval (optional, slow)
+```cmd
+pip install lm-eval
+soup eval -m ./test_output --benchmarks hellaswag --batch-size 4
+```
+---
+## Cleanup (Windows)
+```cmd
+rmdir /s /q test_output test_merged_model
+del test_data.jsonl test_sharegpt.jsonl test_chatml.jsonl test_merged.jsonl test_deduped.jsonl
+del test_soup.yaml test_chat.yaml test_code.yaml test_model.gguf
+```
+Or in PowerShell:
+```powershell
+Remove-Item -Recurse -Force test_output, test_merged_model -ErrorAction SilentlyContinue
+Remove-Item test_data.jsonl, test_sharegpt.jsonl, test_chatml.jsonl, test_merged.jsonl, test_deduped.jsonl, test_soup.yaml, test_chat.yaml, test_code.yaml, test_model.gguf -ErrorAction SilentlyContinue
+```
+## Expected Results
+| Step | Expected |
+|------|----------|
+| Version | `soup v0.2.1` |
+| Init templates | Creates yaml files |
+| Data inspect | Table with stats + sample rows |
+| Data validate | "20/20 rows valid" |
+| Data stats | Length distribution + histogram |
+| Data convert | Creates sharegpt/chatml jsonl files |
+| Data merge | Merges into single file |
+| Data dedup | Removes near-duplicates |
+| Dry run | "Config valid" or similar |
+| Train | Loss decreasing, ~1-3 min |
+| Runs | Shows run with metrics |
+| Chat | Model responds (quality low with 10 samples — that's OK) |
+| Merge | Creates full model in test_merged_model/ |

{soup_cli-0.2.0 → soup_cli-0.2.2}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "hatchling.build"
 [project]
 name = "soup-cli"
-version = "0.2.0"
+version = "0.2.2"
 description = "Fine-tune LLMs in one command. No SSH, no config hell."
 readme = "README.md"
 license = "MIT"

{soup_cli-0.2.0 → soup_cli-0.2.2}/soup_cli/__init__.py RENAMED Viewed

@@ -1,3 +1,3 @@
 """Soup CLI — Fine-tune LLMs in one command."""
-__version__ = "0.2.0"
+__version__ = "0.2.2"

{soup_cli-0.2.0 → soup_cli-0.2.2}/soup_cli/commands/chat.py RENAMED Viewed

@@ -179,7 +179,7 @@ def _load_model(
             base_model,
             trust_remote_code=True,
             device_map="auto",
-            torch_dtype=torch.float16,
+            dtype=torch.float16,
         )
         console.print(f"[dim]Loading LoRA adapter: {model_path}...[/]")
         model_obj = PeftModel.from_pretrained(base, model_path)
@@ -189,7 +189,7 @@ def _load_model(
             model_path,
             trust_remote_code=True,
             device_map="auto",
-            torch_dtype=torch.float16,
+            dtype=torch.float16,
         )
     model_obj.eval()

{soup_cli-0.2.0 → soup_cli-0.2.2}/soup_cli/commands/export.py RENAMED Viewed

@@ -194,7 +194,7 @@ def _merge_adapter(adapter_path: str, base_model: str, output_dir: str):
     console.print(f"[dim]Loading base model: {base_model}...[/]")
     model = AutoModelForCausalLM.from_pretrained(
         base_model,
-        torch_dtype=torch.float16,
+        dtype=torch.float16,
         trust_remote_code=True,
         device_map="cpu",
     )

{soup_cli-0.2.0 → soup_cli-0.2.2}/soup_cli/commands/merge.py RENAMED Viewed

@@ -91,12 +91,12 @@ def merge(
             "bfloat16": torch.bfloat16,
             "float32": torch.float32,
         }
-        torch_dtype = dtype_map[dtype]
+        model_dtype = dtype_map[dtype]
         console.print(f"[dim]Loading base model: {base}...[/]")
         model = AutoModelForCausalLM.from_pretrained(
             base,
-            torch_dtype=torch_dtype,
+            dtype=model_dtype,
             trust_remote_code=True,
             device_map="cpu",
         )

{soup_cli-0.2.0 → soup_cli-0.2.2}/soup_cli/monitoring/callback.py RENAMED Viewed

@@ -47,7 +47,7 @@ class SoupTrainerCallback(TrainerCallback):
             if torch.cuda.is_available():
                 used = torch.cuda.memory_allocated() / (1024**3)
-                total = torch.cuda.get_device_properties(0).total_mem / (1024**3)
+                total = torch.cuda.get_device_properties(0).total_memory / (1024**3)
                 gpu_mem = f"{used:.1f}/{total:.1f} GB"
         except Exception:
             pass

{soup_cli-0.2.0 → soup_cli-0.2.2}/soup_cli/trainer/dpo.py RENAMED Viewed

@@ -127,6 +127,15 @@ class DPOTrainerWrapper:
             output_dir = output_dir / cfg.experiment_name
         output_dir.mkdir(parents=True, exist_ok=True)
+        # --- Calculate warmup steps from ratio ---
+        import math
+        total_steps = (
+            math.ceil(len(train_ds) / batch_size / tcfg.gradient_accumulation_steps)
+            * tcfg.epochs
+        )
+        warmup_steps = int(total_steps * tcfg.warmup_ratio)
         # --- DPO config ---
         dpo_config = DPOConfig(
             output_dir=str(output_dir),
@@ -134,7 +143,7 @@ class DPOTrainerWrapper:
             per_device_train_batch_size=batch_size,
             gradient_accumulation_steps=tcfg.gradient_accumulation_steps,
             learning_rate=tcfg.lr,
-            warmup_ratio=tcfg.warmup_ratio,
+            warmup_steps=warmup_steps,
             weight_decay=tcfg.weight_decay,
             max_grad_norm=tcfg.max_grad_norm,
             optim=tcfg.optimizer,

{soup_cli-0.2.0 → soup_cli-0.2.2}/soup_cli/trainer/sft.py RENAMED Viewed

@@ -137,6 +137,15 @@ class SFTTrainerWrapper:
             output_dir = output_dir / cfg.experiment_name
         output_dir.mkdir(parents=True, exist_ok=True)
+        # --- Calculate warmup steps from ratio ---
+        import math
+        total_steps = (
+            math.ceil(len(train_ds) / batch_size / tcfg.gradient_accumulation_steps)
+            * tcfg.epochs
+        )
+        warmup_steps = int(total_steps * tcfg.warmup_ratio)
         # --- Training args ---
         training_args = TrainingArguments(
             output_dir=str(output_dir),
@@ -144,7 +153,7 @@ class SFTTrainerWrapper:
             per_device_train_batch_size=batch_size,
             gradient_accumulation_steps=tcfg.gradient_accumulation_steps,
             learning_rate=tcfg.lr,
-            warmup_ratio=tcfg.warmup_ratio,
+            warmup_steps=warmup_steps,
             weight_decay=tcfg.weight_decay,
             max_grad_norm=tcfg.max_grad_norm,
             optim=tcfg.optimizer,

{soup_cli-0.2.0 → soup_cli-0.2.2}/soup_cli/utils/gpu.py RENAMED Viewed

@@ -25,7 +25,7 @@ def get_gpu_info() -> dict:
         import torch
         if torch.cuda.is_available():
-            total = torch.cuda.get_device_properties(0).total_mem
+            total = torch.cuda.get_device_properties(0).total_memory
             total_gb = total / (1024**3)
             return {
                 "memory_total": f"{total_gb:.1f} GB",

{soup_cli-0.2.0 → soup_cli-0.2.2}/tests/test_cli.py RENAMED Viewed

@@ -2,6 +2,7 @@
 from typer.testing import CliRunner
+from soup_cli import __version__
 from soup_cli.cli import app
 runner = CliRunner()
@@ -10,7 +11,7 @@ runner = CliRunner()
 def test_version():
     result = runner.invoke(app, ["version"])
     assert result.exit_code == 0
-    assert "0.2.0" in result.output
+    assert __version__ in result.output
 def test_help():

{soup_cli-0.2.0 → soup_cli-0.2.2}/tests/test_resume.py RENAMED Viewed

@@ -83,13 +83,13 @@ def test_resolve_checkpoint_auto_ignores_non_checkpoint_dirs(tmp_path: Path):
 def test_train_resume_flag_in_help():
     result = runner.invoke(app, ["train", "--help"])
     assert result.exit_code == 0
-    assert "--resume" in result.output
+    assert "resume" in result.output.lower()
 def test_train_wandb_flag_in_help():
     result = runner.invoke(app, ["train", "--help"])
     assert result.exit_code == 0
-    assert "--wandb" in result.output
+    assert "wandb" in result.output.lower()
 def test_train_resume_nonexistent_checkpoint(tmp_path: Path):