xlmtec 3.15.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- xlmtec-3.15.0/LICENSE +21 -0
- xlmtec-3.15.0/PKG-INFO +250 -0
- xlmtec-3.15.0/README.md +177 -0
- xlmtec-3.15.0/pyproject.toml +158 -0
- xlmtec-3.15.0/setup.cfg +4 -0
- xlmtec-3.15.0/tests/test_benchmark.py +162 -0
- xlmtec-3.15.0/tests/test_cli_train.py +180 -0
- xlmtec-3.15.0/tests/test_config.py +136 -0
- xlmtec-3.15.0/tests/test_data.py +225 -0
- xlmtec-3.15.0/tests/test_dpo_trainer.py +225 -0
- xlmtec-3.15.0/tests/test_evaluate.py +148 -0
- xlmtec-3.15.0/tests/test_evaluation.py +162 -0
- xlmtec-3.15.0/tests/test_feature_distillation_trainer.py +408 -0
- xlmtec-3.15.0/tests/test_full_trainer.py +131 -0
- xlmtec-3.15.0/tests/test_instruction_trainer.py +167 -0
- xlmtec-3.15.0/tests/test_integration.py +715 -0
- xlmtec-3.15.0/tests/test_merge.py +176 -0
- xlmtec-3.15.0/tests/test_prune.py +176 -0
- xlmtec-3.15.0/tests/test_qlora_trainer.py +233 -0
- xlmtec-3.15.0/tests/test_recommend.py +109 -0
- xlmtec-3.15.0/tests/test_response_distillation_trainer.py +367 -0
- xlmtec-3.15.0/tests/test_structured_pruner.py +254 -0
- xlmtec-3.15.0/tests/test_trainers.py +230 -0
- xlmtec-3.15.0/tests/test_tui.py +715 -0
- xlmtec-3.15.0/tests/test_upload.py +176 -0
- xlmtec-3.15.0/tests/test_wanda_cli.py +121 -0
- xlmtec-3.15.0/tests/test_wanda_pruner.py +248 -0
- xlmtec-3.15.0/xlmtec/__init__.py +0 -0
- xlmtec-3.15.0/xlmtec/cli/__init__.py +1 -0
- xlmtec-3.15.0/xlmtec/cli/commands/config.py +277 -0
- xlmtec-3.15.0/xlmtec/cli/commands/evaluate.py +332 -0
- xlmtec-3.15.0/xlmtec/cli/commands/recommend.py +240 -0
- xlmtec-3.15.0/xlmtec/cli/commands/train.py +284 -0
- xlmtec-3.15.0/xlmtec/cli/main.py +822 -0
- xlmtec-3.15.0/xlmtec/core/__init__.py +0 -0
- xlmtec-3.15.0/xlmtec/core/config.py +300 -0
- xlmtec-3.15.0/xlmtec/core/exceptions.py +147 -0
- xlmtec-3.15.0/xlmtec/core/types.py +258 -0
- xlmtec-3.15.0/xlmtec/evaluation/__init__.py +21 -0
- xlmtec-3.15.0/xlmtec/evaluation/base.py +357 -0
- xlmtec-3.15.0/xlmtec/evaluation/benchmarker.py +229 -0
- xlmtec-3.15.0/xlmtec/evaluation/evaluator.py +373 -0
- xlmtec-3.15.0/xlmtec/evaluation/metrics.py +212 -0
- xlmtec-3.15.0/xlmtec/models/__init__.py +0 -0
- xlmtec-3.15.0/xlmtec/models/loader.py +73 -0
- xlmtec-3.15.0/xlmtec/trainers/__init__.py +32 -0
- xlmtec-3.15.0/xlmtec/trainers/base.py +220 -0
- xlmtec-3.15.0/xlmtec/trainers/dpo_trainer.py +236 -0
- xlmtec-3.15.0/xlmtec/trainers/factory.py +164 -0
- xlmtec-3.15.0/xlmtec/trainers/feature_distillation_trainer.py +346 -0
- xlmtec-3.15.0/xlmtec/trainers/full_trainer.py +68 -0
- xlmtec-3.15.0/xlmtec/trainers/instruction_trainer.py +160 -0
- xlmtec-3.15.0/xlmtec/trainers/lora_trainer.py +83 -0
- xlmtec-3.15.0/xlmtec/trainers/qlora_trainer.py +56 -0
- xlmtec-3.15.0/xlmtec/trainers/response_distillation_trainer.py +245 -0
- xlmtec-3.15.0/xlmtec/trainers/structured_pruner.py +372 -0
- xlmtec-3.15.0/xlmtec/trainers/wanda_pruner.py +368 -0
- xlmtec-3.15.0/xlmtec/tui/__init__.py +1 -0
- xlmtec-3.15.0/xlmtec/tui/app.py +55 -0
- xlmtec-3.15.0/xlmtec/tui/screens/__init__.py +1 -0
- xlmtec-3.15.0/xlmtec/tui/screens/benchmark.py +209 -0
- xlmtec-3.15.0/xlmtec/tui/screens/evaluate.py +193 -0
- xlmtec-3.15.0/xlmtec/tui/screens/home.py +149 -0
- xlmtec-3.15.0/xlmtec/tui/screens/merge.py +172 -0
- xlmtec-3.15.0/xlmtec/tui/screens/recommend.py +132 -0
- xlmtec-3.15.0/xlmtec/tui/screens/result.py +124 -0
- xlmtec-3.15.0/xlmtec/tui/screens/running.py +215 -0
- xlmtec-3.15.0/xlmtec/tui/screens/train.py +206 -0
- xlmtec-3.15.0/xlmtec/tui/screens/upload.py +216 -0
- xlmtec-3.15.0/xlmtec/tui/widgets/__init__.py +1 -0
- xlmtec-3.15.0/xlmtec/tui/widgets/command_card.py +90 -0
- xlmtec-3.15.0/xlmtec/tui/widgets/log_panel.py +52 -0
- xlmtec-3.15.0/xlmtec/tui/widgets/metric_table.py +42 -0
- xlmtec-3.15.0/xlmtec/utils/__init__.py +0 -0
- xlmtec-3.15.0/xlmtec/utils/logging.py +55 -0
- xlmtec-3.15.0/xlmtec.egg-info/PKG-INFO +250 -0
- xlmtec-3.15.0/xlmtec.egg-info/SOURCES.txt +79 -0
- xlmtec-3.15.0/xlmtec.egg-info/dependency_links.txt +1 -0
- xlmtec-3.15.0/xlmtec.egg-info/entry_points.txt +2 -0
- xlmtec-3.15.0/xlmtec.egg-info/requires.txt +55 -0
- xlmtec-3.15.0/xlmtec.egg-info/top_level.txt +1 -0
xlmtec-3.15.0/LICENSE
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2025 Abdur Rahman
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
xlmtec-3.15.0/PKG-INFO
ADDED
|
@@ -0,0 +1,250 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: xlmtec
|
|
3
|
+
Version: 3.15.0
|
|
4
|
+
Summary: Production-grade LLM fine tuning framework with CLI
|
|
5
|
+
Author: Abdur Rahman
|
|
6
|
+
License: MIT
|
|
7
|
+
Project-URL: Homepage, https://github.com/Abdur-azure/xlmtec
|
|
8
|
+
Project-URL: Documentation, https://Abdur-azure.github.io/xlmtec
|
|
9
|
+
Project-URL: Issues, https://github.com/Abdur-azure/xlmtec/issues
|
|
10
|
+
Keywords: llm,fine-tuning,lora,qlora,transformers
|
|
11
|
+
Classifier: Development Status :: 4 - Beta
|
|
12
|
+
Classifier: Intended Audience :: Developers
|
|
13
|
+
Classifier: Intended Audience :: Science/Research
|
|
14
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
15
|
+
Classifier: Programming Language :: Python :: 3
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
19
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
20
|
+
Requires-Python: >=3.10
|
|
21
|
+
Description-Content-Type: text/markdown
|
|
22
|
+
License-File: LICENSE
|
|
23
|
+
Requires-Dist: pydantic>=2.0.0
|
|
24
|
+
Requires-Dist: pyyaml>=6.0.0
|
|
25
|
+
Requires-Dist: typer>=0.9.0
|
|
26
|
+
Requires-Dist: rich>=13.0.0
|
|
27
|
+
Requires-Dist: tqdm>=4.65.0
|
|
28
|
+
Requires-Dist: rouge-score>=0.1.2
|
|
29
|
+
Requires-Dist: nltk>=3.8.0
|
|
30
|
+
Requires-Dist: pandas>=2.0.0
|
|
31
|
+
Requires-Dist: huggingface-hub>=0.19.0
|
|
32
|
+
Requires-Dist: sentencepiece>=0.1.99
|
|
33
|
+
Requires-Dist: protobuf>=3.20.0
|
|
34
|
+
Provides-Extra: ml
|
|
35
|
+
Requires-Dist: torch>=2.0.0; extra == "ml"
|
|
36
|
+
Requires-Dist: transformers>=4.35.0; extra == "ml"
|
|
37
|
+
Requires-Dist: datasets>=2.14.0; extra == "ml"
|
|
38
|
+
Requires-Dist: peft>=0.7.0; extra == "ml"
|
|
39
|
+
Requires-Dist: accelerate>=0.24.0; extra == "ml"
|
|
40
|
+
Requires-Dist: bitsandbytes>=0.41.0; extra == "ml"
|
|
41
|
+
Provides-Extra: tui
|
|
42
|
+
Requires-Dist: textual>=0.52.0; extra == "tui"
|
|
43
|
+
Provides-Extra: dpo
|
|
44
|
+
Requires-Dist: trl>=0.7.0; extra == "dpo"
|
|
45
|
+
Provides-Extra: dev
|
|
46
|
+
Requires-Dist: pytest>=7.0.0; extra == "dev"
|
|
47
|
+
Requires-Dist: pytest-asyncio>=0.21.0; extra == "dev"
|
|
48
|
+
Requires-Dist: pytest-cov>=4.0.0; extra == "dev"
|
|
49
|
+
Requires-Dist: pytest-timeout>=2.0.0; extra == "dev"
|
|
50
|
+
Requires-Dist: ruff>=0.1.0; extra == "dev"
|
|
51
|
+
Requires-Dist: mypy>=1.0.0; extra == "dev"
|
|
52
|
+
Provides-Extra: docs
|
|
53
|
+
Requires-Dist: mkdocs>=1.5.0; extra == "docs"
|
|
54
|
+
Requires-Dist: mkdocs-material>=9.0.0; extra == "docs"
|
|
55
|
+
Requires-Dist: mkdocs-minify-plugin>=0.7.0; extra == "docs"
|
|
56
|
+
Requires-Dist: pymdown-extensions>=10.0.0; extra == "docs"
|
|
57
|
+
Provides-Extra: full
|
|
58
|
+
Requires-Dist: torch>=2.0.0; extra == "full"
|
|
59
|
+
Requires-Dist: transformers>=4.35.0; extra == "full"
|
|
60
|
+
Requires-Dist: datasets>=2.14.0; extra == "full"
|
|
61
|
+
Requires-Dist: peft>=0.7.0; extra == "full"
|
|
62
|
+
Requires-Dist: accelerate>=0.24.0; extra == "full"
|
|
63
|
+
Requires-Dist: bitsandbytes>=0.41.0; extra == "full"
|
|
64
|
+
Requires-Dist: textual>=0.52.0; extra == "full"
|
|
65
|
+
Requires-Dist: trl>=0.7.0; extra == "full"
|
|
66
|
+
Requires-Dist: pytest>=7.0.0; extra == "full"
|
|
67
|
+
Requires-Dist: pytest-asyncio>=0.21.0; extra == "full"
|
|
68
|
+
Requires-Dist: pytest-cov>=4.0.0; extra == "full"
|
|
69
|
+
Requires-Dist: pytest-timeout>=2.0.0; extra == "full"
|
|
70
|
+
Requires-Dist: ruff>=0.1.0; extra == "full"
|
|
71
|
+
Requires-Dist: mypy>=1.0.0; extra == "full"
|
|
72
|
+
Dynamic: license-file
|
|
73
|
+
|
|
74
|
+
# xlmtec
|
|
75
|
+
|
|
76
|
+
**Production-grade LLM fine tuning, distillation, and pruning from the command line.**
|
|
77
|
+
|
|
78
|
+
[](https://github.com/Abdur-azure/xlmtec/actions)
|
|
79
|
+
[](https://www.python.org)
|
|
80
|
+
[](LICENSE)
|
|
81
|
+
|
|
82
|
+
---
|
|
83
|
+
|
|
84
|
+
## What it does
|
|
85
|
+
|
|
86
|
+
`xlmtec` is a modular Python framework for fine-tuning, distilling, and pruning large language models. It wraps HuggingFace Transformers + PEFT in a clean CLI, a validated config system, a composable trainer stack, an interactive TUI, and a full test suite — all CPU-runnable for unit tests.
|
|
87
|
+
|
|
88
|
+
---
|
|
89
|
+
|
|
90
|
+
## Install
|
|
91
|
+
|
|
92
|
+
```bash
|
|
93
|
+
git clone https://github.com/Abdur-azure/xlmtec.git
|
|
94
|
+
cd xlmtec
|
|
95
|
+
pip install -e .
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
---
|
|
99
|
+
|
|
100
|
+
## 5-minute quickstart
|
|
101
|
+
|
|
102
|
+
```bash
|
|
103
|
+
# 1. Generate sample training data (no network required)
|
|
104
|
+
python examples/generate_sample_data.py
|
|
105
|
+
|
|
106
|
+
# 2. Not sure which method to use? Ask
|
|
107
|
+
xlmtec recommend gpt2 --output my_config.yaml
|
|
108
|
+
|
|
109
|
+
# 3. Train with the generated config
|
|
110
|
+
xlmtec train --config my_config.yaml
|
|
111
|
+
|
|
112
|
+
# 4. Or use a ready-made config
|
|
113
|
+
xlmtec train --config examples/configs/lora_gpt2.yaml
|
|
114
|
+
|
|
115
|
+
# 5. Launch the interactive TUI
|
|
116
|
+
xlmtec tui
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
---
|
|
120
|
+
|
|
121
|
+
## CLI commands
|
|
122
|
+
|
|
123
|
+
| Command | What it does |
|
|
124
|
+
|---------|-------------|
|
|
125
|
+
| `xlmtec train` | Fine-tune using a YAML config or inline flags (LoRA / QLoRA / Full / Instruction / DPO / Distillation) |
|
|
126
|
+
| `xlmtec evaluate` | Score a saved checkpoint (ROUGE, BLEU, Perplexity) |
|
|
127
|
+
| `xlmtec benchmark` | Before/after comparison: base vs fine-tuned |
|
|
128
|
+
| `xlmtec merge` | Merge LoRA adapter into base model → standalone model |
|
|
129
|
+
| `xlmtec upload` | Push adapter or merged model to HuggingFace Hub |
|
|
130
|
+
| `xlmtec recommend` | Inspect model size + VRAM, output optimal YAML config |
|
|
131
|
+
| `xlmtec prune` | Structured pruning — zero lowest-magnitude attention heads |
|
|
132
|
+
| `xlmtec wanda` | WANDA unstructured pruning — zero weights by \|W\|×activation score |
|
|
133
|
+
| `xlmtec tui` | Interactive Textual TUI — all commands via a terminal UI |
|
|
134
|
+
|
|
135
|
+
---
|
|
136
|
+
|
|
137
|
+
## Training methods
|
|
138
|
+
|
|
139
|
+
| Method | Flag | Notes |
|
|
140
|
+
|--------|------|-------|
|
|
141
|
+
| LoRA | `--method lora` | Default. Adapter-based, memory-efficient |
|
|
142
|
+
| QLoRA | `--method qlora` | 4-bit quantised LoRA — large models on limited VRAM |
|
|
143
|
+
| Full Fine-Tuning | `--method full_finetuning` | All parameters — small models only |
|
|
144
|
+
| Instruction Tuning | `--method instruction_tuning` | Alpaca-style `{instruction, input, response}` data |
|
|
145
|
+
| DPO | `--method dpo` | Direct Preference Optimization — requires `pip install trl` |
|
|
146
|
+
| Response Distillation | `--method vanilla_distillation` | Student mimics teacher logits (KL + CE loss) |
|
|
147
|
+
| Feature Distillation | `--method feature_distillation` | Student mimics teacher hidden states (MSE + KL + CE) |
|
|
148
|
+
|
|
149
|
+
---
|
|
150
|
+
|
|
151
|
+
## Pruning commands
|
|
152
|
+
|
|
153
|
+
```bash
|
|
154
|
+
# Structured pruning — zero lowest-magnitude attention heads
|
|
155
|
+
xlmtec prune ./outputs/gpt2_lora \
|
|
156
|
+
--output ./outputs/gpt2_pruned \
|
|
157
|
+
--sparsity 0.3 \
|
|
158
|
+
--method heads
|
|
159
|
+
|
|
160
|
+
# WANDA unstructured pruning — weight × activation scoring, zero-shot
|
|
161
|
+
xlmtec wanda ./outputs/gpt2_lora \
|
|
162
|
+
--output ./outputs/gpt2_wanda \
|
|
163
|
+
--sparsity 0.5 \
|
|
164
|
+
--dataset ./data/sample.jsonl
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
---
|
|
168
|
+
|
|
169
|
+
## Example configs
|
|
170
|
+
|
|
171
|
+
| Config | Method | Model | Data |
|
|
172
|
+
|--------|--------|-------|------|
|
|
173
|
+
| `lora_gpt2.yaml` | LoRA | GPT-2 | `data/sample.jsonl` |
|
|
174
|
+
| `qlora_llama.yaml` | QLoRA | LLaMA-3.2-1B | HF Hub (needs token) |
|
|
175
|
+
| `instruction_tuning.yaml` | Instruction | GPT-2 | `data/instructions.jsonl` |
|
|
176
|
+
| `full_finetuning.yaml` | Full | GPT-2 | `data/sample.jsonl` |
|
|
177
|
+
| `dpo.yaml` | DPO | GPT-2 | `data/dpo_sample.jsonl` |
|
|
178
|
+
| `response_distillation.yaml` | Response Distillation | GPT-2 (student) ← GPT-2-medium | `data/sample.jsonl` |
|
|
179
|
+
| `feature_distillation.yaml` | Feature Distillation | GPT-2 (student) ← GPT-2-medium | `data/sample.jsonl` |
|
|
180
|
+
| `structured_pruning.yaml` | Structured Pruning | GPT-2 | — |
|
|
181
|
+
| `wanda.yaml` | WANDA Pruning | GPT-2 | `data/sample.jsonl` (calibration) |
|
|
182
|
+
|
|
183
|
+
---
|
|
184
|
+
|
|
185
|
+
## Python API
|
|
186
|
+
|
|
187
|
+
```python
|
|
188
|
+
from xlmtec.core.config import ConfigBuilder
|
|
189
|
+
from xlmtec.core.types import TrainingMethod, DatasetSource
|
|
190
|
+
from xlmtec.models.loader import load_model_and_tokenizer
|
|
191
|
+
from xlmtec.data import prepare_dataset
|
|
192
|
+
from xlmtec.trainers import TrainerFactory
|
|
193
|
+
|
|
194
|
+
config = (
|
|
195
|
+
ConfigBuilder()
|
|
196
|
+
.with_model("gpt2")
|
|
197
|
+
.with_dataset("./data/sample.jsonl", source=DatasetSource.LOCAL_FILE)
|
|
198
|
+
.with_tokenization(max_length=256)
|
|
199
|
+
.with_training(TrainingMethod.LORA, "./output", num_epochs=3)
|
|
200
|
+
.with_lora(r=8, lora_alpha=16)
|
|
201
|
+
.build()
|
|
202
|
+
)
|
|
203
|
+
|
|
204
|
+
model, tokenizer = load_model_and_tokenizer(config.model.to_config())
|
|
205
|
+
dataset = prepare_dataset(config.dataset.to_config(), config.tokenization.to_config(), tokenizer)
|
|
206
|
+
result = TrainerFactory.train(
|
|
207
|
+
model, tokenizer, dataset,
|
|
208
|
+
config.training.to_config(),
|
|
209
|
+
config.lora.to_config(),
|
|
210
|
+
)
|
|
211
|
+
print(f"Done. Loss: {result.train_loss:.4f} → {result.output_dir}")
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
---
|
|
215
|
+
|
|
216
|
+
## Docs
|
|
217
|
+
|
|
218
|
+
- [Usage Guide](docs/usage.md) — all 9 commands with examples
|
|
219
|
+
- [Configuration Reference](docs/configuration.md) — YAML config fields for all methods
|
|
220
|
+
- [API Reference](docs/api.md) — Python API for all trainers and pruners
|
|
221
|
+
- [TUI Guide](docs/tui.md) — interactive terminal interface
|
|
222
|
+
- [Architecture](docs/ARCHITECTURE.md) — module design
|
|
223
|
+
- [Contributing](CONTRIBUTING.md) — how to add trainers or commands
|
|
224
|
+
|
|
225
|
+
---
|
|
226
|
+
|
|
227
|
+
## Tests
|
|
228
|
+
|
|
229
|
+
```bash
|
|
230
|
+
# Unit tests (no GPU needed)
|
|
231
|
+
pytest tests/ -v --ignore=tests/test_integration.py
|
|
232
|
+
|
|
233
|
+
# Integration tests (CPU ok, ~30s — downloads GPT-2 once)
|
|
234
|
+
pytest tests/test_integration.py -v -s
|
|
235
|
+
|
|
236
|
+
# Full suite
|
|
237
|
+
pytest tests/ -v
|
|
238
|
+
```
|
|
239
|
+
|
|
240
|
+
---
|
|
241
|
+
|
|
242
|
+
## Project status
|
|
243
|
+
|
|
244
|
+
| Aspect | Status |
|
|
245
|
+
|--------|--------|
|
|
246
|
+
| Version | 3.13.0 |
|
|
247
|
+
| Tests | 200+ unit + integration, all green |
|
|
248
|
+
| CI | pytest on Python 3.10 / 3.11 / 3.12 |
|
|
249
|
+
| Platform | Windows / macOS / Linux |
|
|
250
|
+
| License | MIT |
|
xlmtec-3.15.0/README.md
ADDED
|
@@ -0,0 +1,177 @@
|
|
|
1
|
+
# xlmtec
|
|
2
|
+
|
|
3
|
+
**Production-grade LLM fine tuning, distillation, and pruning from the command line.**
|
|
4
|
+
|
|
5
|
+
[](https://github.com/Abdur-azure/xlmtec/actions)
|
|
6
|
+
[](https://www.python.org)
|
|
7
|
+
[](LICENSE)
|
|
8
|
+
|
|
9
|
+
---
|
|
10
|
+
|
|
11
|
+
## What it does
|
|
12
|
+
|
|
13
|
+
`xlmtec` is a modular Python framework for fine-tuning, distilling, and pruning large language models. It wraps HuggingFace Transformers + PEFT in a clean CLI, a validated config system, a composable trainer stack, an interactive TUI, and a full test suite — all CPU-runnable for unit tests.
|
|
14
|
+
|
|
15
|
+
---
|
|
16
|
+
|
|
17
|
+
## Install
|
|
18
|
+
|
|
19
|
+
```bash
|
|
20
|
+
git clone https://github.com/Abdur-azure/xlmtec.git
|
|
21
|
+
cd xlmtec
|
|
22
|
+
pip install -e .
|
|
23
|
+
```
|
|
24
|
+
|
|
25
|
+
---
|
|
26
|
+
|
|
27
|
+
## 5-minute quickstart
|
|
28
|
+
|
|
29
|
+
```bash
|
|
30
|
+
# 1. Generate sample training data (no network required)
|
|
31
|
+
python examples/generate_sample_data.py
|
|
32
|
+
|
|
33
|
+
# 2. Not sure which method to use? Ask
|
|
34
|
+
xlmtec recommend gpt2 --output my_config.yaml
|
|
35
|
+
|
|
36
|
+
# 3. Train with the generated config
|
|
37
|
+
xlmtec train --config my_config.yaml
|
|
38
|
+
|
|
39
|
+
# 4. Or use a ready-made config
|
|
40
|
+
xlmtec train --config examples/configs/lora_gpt2.yaml
|
|
41
|
+
|
|
42
|
+
# 5. Launch the interactive TUI
|
|
43
|
+
xlmtec tui
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
---
|
|
47
|
+
|
|
48
|
+
## CLI commands
|
|
49
|
+
|
|
50
|
+
| Command | What it does |
|
|
51
|
+
|---------|-------------|
|
|
52
|
+
| `xlmtec train` | Fine-tune using a YAML config or inline flags (LoRA / QLoRA / Full / Instruction / DPO / Distillation) |
|
|
53
|
+
| `xlmtec evaluate` | Score a saved checkpoint (ROUGE, BLEU, Perplexity) |
|
|
54
|
+
| `xlmtec benchmark` | Before/after comparison: base vs fine-tuned |
|
|
55
|
+
| `xlmtec merge` | Merge LoRA adapter into base model → standalone model |
|
|
56
|
+
| `xlmtec upload` | Push adapter or merged model to HuggingFace Hub |
|
|
57
|
+
| `xlmtec recommend` | Inspect model size + VRAM, output optimal YAML config |
|
|
58
|
+
| `xlmtec prune` | Structured pruning — zero lowest-magnitude attention heads |
|
|
59
|
+
| `xlmtec wanda` | WANDA unstructured pruning — zero weights by \|W\|×activation score |
|
|
60
|
+
| `xlmtec tui` | Interactive Textual TUI — all commands via a terminal UI |
|
|
61
|
+
|
|
62
|
+
---
|
|
63
|
+
|
|
64
|
+
## Training methods
|
|
65
|
+
|
|
66
|
+
| Method | Flag | Notes |
|
|
67
|
+
|--------|------|-------|
|
|
68
|
+
| LoRA | `--method lora` | Default. Adapter-based, memory-efficient |
|
|
69
|
+
| QLoRA | `--method qlora` | 4-bit quantised LoRA — large models on limited VRAM |
|
|
70
|
+
| Full Fine-Tuning | `--method full_finetuning` | All parameters — small models only |
|
|
71
|
+
| Instruction Tuning | `--method instruction_tuning` | Alpaca-style `{instruction, input, response}` data |
|
|
72
|
+
| DPO | `--method dpo` | Direct Preference Optimization — requires `pip install trl` |
|
|
73
|
+
| Response Distillation | `--method vanilla_distillation` | Student mimics teacher logits (KL + CE loss) |
|
|
74
|
+
| Feature Distillation | `--method feature_distillation` | Student mimics teacher hidden states (MSE + KL + CE) |
|
|
75
|
+
|
|
76
|
+
---
|
|
77
|
+
|
|
78
|
+
## Pruning commands
|
|
79
|
+
|
|
80
|
+
```bash
|
|
81
|
+
# Structured pruning — zero lowest-magnitude attention heads
|
|
82
|
+
xlmtec prune ./outputs/gpt2_lora \
|
|
83
|
+
--output ./outputs/gpt2_pruned \
|
|
84
|
+
--sparsity 0.3 \
|
|
85
|
+
--method heads
|
|
86
|
+
|
|
87
|
+
# WANDA unstructured pruning — weight × activation scoring, zero-shot
|
|
88
|
+
xlmtec wanda ./outputs/gpt2_lora \
|
|
89
|
+
--output ./outputs/gpt2_wanda \
|
|
90
|
+
--sparsity 0.5 \
|
|
91
|
+
--dataset ./data/sample.jsonl
|
|
92
|
+
```
|
|
93
|
+
|
|
94
|
+
---
|
|
95
|
+
|
|
96
|
+
## Example configs
|
|
97
|
+
|
|
98
|
+
| Config | Method | Model | Data |
|
|
99
|
+
|--------|--------|-------|------|
|
|
100
|
+
| `lora_gpt2.yaml` | LoRA | GPT-2 | `data/sample.jsonl` |
|
|
101
|
+
| `qlora_llama.yaml` | QLoRA | LLaMA-3.2-1B | HF Hub (needs token) |
|
|
102
|
+
| `instruction_tuning.yaml` | Instruction | GPT-2 | `data/instructions.jsonl` |
|
|
103
|
+
| `full_finetuning.yaml` | Full | GPT-2 | `data/sample.jsonl` |
|
|
104
|
+
| `dpo.yaml` | DPO | GPT-2 | `data/dpo_sample.jsonl` |
|
|
105
|
+
| `response_distillation.yaml` | Response Distillation | GPT-2 (student) ← GPT-2-medium | `data/sample.jsonl` |
|
|
106
|
+
| `feature_distillation.yaml` | Feature Distillation | GPT-2 (student) ← GPT-2-medium | `data/sample.jsonl` |
|
|
107
|
+
| `structured_pruning.yaml` | Structured Pruning | GPT-2 | — |
|
|
108
|
+
| `wanda.yaml` | WANDA Pruning | GPT-2 | `data/sample.jsonl` (calibration) |
|
|
109
|
+
|
|
110
|
+
---
|
|
111
|
+
|
|
112
|
+
## Python API
|
|
113
|
+
|
|
114
|
+
```python
|
|
115
|
+
from xlmtec.core.config import ConfigBuilder
|
|
116
|
+
from xlmtec.core.types import TrainingMethod, DatasetSource
|
|
117
|
+
from xlmtec.models.loader import load_model_and_tokenizer
|
|
118
|
+
from xlmtec.data import prepare_dataset
|
|
119
|
+
from xlmtec.trainers import TrainerFactory
|
|
120
|
+
|
|
121
|
+
config = (
|
|
122
|
+
ConfigBuilder()
|
|
123
|
+
.with_model("gpt2")
|
|
124
|
+
.with_dataset("./data/sample.jsonl", source=DatasetSource.LOCAL_FILE)
|
|
125
|
+
.with_tokenization(max_length=256)
|
|
126
|
+
.with_training(TrainingMethod.LORA, "./output", num_epochs=3)
|
|
127
|
+
.with_lora(r=8, lora_alpha=16)
|
|
128
|
+
.build()
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
model, tokenizer = load_model_and_tokenizer(config.model.to_config())
|
|
132
|
+
dataset = prepare_dataset(config.dataset.to_config(), config.tokenization.to_config(), tokenizer)
|
|
133
|
+
result = TrainerFactory.train(
|
|
134
|
+
model, tokenizer, dataset,
|
|
135
|
+
config.training.to_config(),
|
|
136
|
+
config.lora.to_config(),
|
|
137
|
+
)
|
|
138
|
+
print(f"Done. Loss: {result.train_loss:.4f} → {result.output_dir}")
|
|
139
|
+
```
|
|
140
|
+
|
|
141
|
+
---
|
|
142
|
+
|
|
143
|
+
## Docs
|
|
144
|
+
|
|
145
|
+
- [Usage Guide](docs/usage.md) — all 9 commands with examples
|
|
146
|
+
- [Configuration Reference](docs/configuration.md) — YAML config fields for all methods
|
|
147
|
+
- [API Reference](docs/api.md) — Python API for all trainers and pruners
|
|
148
|
+
- [TUI Guide](docs/tui.md) — interactive terminal interface
|
|
149
|
+
- [Architecture](docs/ARCHITECTURE.md) — module design
|
|
150
|
+
- [Contributing](CONTRIBUTING.md) — how to add trainers or commands
|
|
151
|
+
|
|
152
|
+
---
|
|
153
|
+
|
|
154
|
+
## Tests
|
|
155
|
+
|
|
156
|
+
```bash
|
|
157
|
+
# Unit tests (no GPU needed)
|
|
158
|
+
pytest tests/ -v --ignore=tests/test_integration.py
|
|
159
|
+
|
|
160
|
+
# Integration tests (CPU ok, ~30s — downloads GPT-2 once)
|
|
161
|
+
pytest tests/test_integration.py -v -s
|
|
162
|
+
|
|
163
|
+
# Full suite
|
|
164
|
+
pytest tests/ -v
|
|
165
|
+
```
|
|
166
|
+
|
|
167
|
+
---
|
|
168
|
+
|
|
169
|
+
## Project status
|
|
170
|
+
|
|
171
|
+
| Aspect | Status |
|
|
172
|
+
|--------|--------|
|
|
173
|
+
| Version | 3.13.0 |
|
|
174
|
+
| Tests | 200+ unit + integration, all green |
|
|
175
|
+
| CI | pytest on Python 3.10 / 3.11 / 3.12 |
|
|
176
|
+
| Platform | Windows / macOS / Linux |
|
|
177
|
+
| License | MIT |
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=68", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
|
+
|
|
5
|
+
[project]
|
|
6
|
+
name = "xlmtec"
|
|
7
|
+
version = "3.15.0"
|
|
8
|
+
description = "Production-grade LLM fine tuning framework with CLI"
|
|
9
|
+
readme = { file = "README.md", content-type = "text/markdown" }
|
|
10
|
+
requires-python = ">=3.10"
|
|
11
|
+
license = { text = "MIT" }
|
|
12
|
+
authors = [
|
|
13
|
+
{ name = "Abdur Rahman" }
|
|
14
|
+
]
|
|
15
|
+
keywords = ["llm", "fine-tuning", "lora", "qlora", "transformers"]
|
|
16
|
+
classifiers = [
|
|
17
|
+
"Development Status :: 4 - Beta",
|
|
18
|
+
"Intended Audience :: Developers",
|
|
19
|
+
"Intended Audience :: Science/Research",
|
|
20
|
+
"License :: OSI Approved :: MIT License",
|
|
21
|
+
"Programming Language :: Python :: 3",
|
|
22
|
+
"Programming Language :: Python :: 3.10",
|
|
23
|
+
"Programming Language :: Python :: 3.11",
|
|
24
|
+
"Programming Language :: Python :: 3.12",
|
|
25
|
+
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
|
26
|
+
]
|
|
27
|
+
|
|
28
|
+
# ---------------------------------------------------------------------------
|
|
29
|
+
# Core (lightweight) — installs in seconds, no GPU libraries
|
|
30
|
+
# ---------------------------------------------------------------------------
|
|
31
|
+
dependencies = [
|
|
32
|
+
"pydantic>=2.0.0",
|
|
33
|
+
"pyyaml>=6.0.0",
|
|
34
|
+
"typer>=0.9.0",
|
|
35
|
+
"rich>=13.0.0",
|
|
36
|
+
"tqdm>=4.65.0",
|
|
37
|
+
"rouge-score>=0.1.2",
|
|
38
|
+
"nltk>=3.8.0",
|
|
39
|
+
"pandas>=2.0.0",
|
|
40
|
+
"huggingface-hub>=0.19.0",
|
|
41
|
+
"sentencepiece>=0.1.99",
|
|
42
|
+
"protobuf>=3.20.0",
|
|
43
|
+
]
|
|
44
|
+
|
|
45
|
+
# ---------------------------------------------------------------------------
|
|
46
|
+
# Optional extras — install what you need
|
|
47
|
+
# ---------------------------------------------------------------------------
|
|
48
|
+
[project.optional-dependencies]
|
|
49
|
+
|
|
50
|
+
# Full ML stack (training / inference)
|
|
51
|
+
ml = [
|
|
52
|
+
"torch>=2.0.0",
|
|
53
|
+
"transformers>=4.35.0",
|
|
54
|
+
"datasets>=2.14.0",
|
|
55
|
+
"peft>=0.7.0",
|
|
56
|
+
"accelerate>=0.24.0",
|
|
57
|
+
"bitsandbytes>=0.41.0",
|
|
58
|
+
]
|
|
59
|
+
|
|
60
|
+
# Interactive terminal UI
|
|
61
|
+
tui = [
|
|
62
|
+
"textual>=0.52.0",
|
|
63
|
+
]
|
|
64
|
+
|
|
65
|
+
# DPO training (requires trl)
|
|
66
|
+
dpo = [
|
|
67
|
+
"trl>=0.7.0",
|
|
68
|
+
]
|
|
69
|
+
|
|
70
|
+
# Development (tests, linting, type-checking)
|
|
71
|
+
dev = [
|
|
72
|
+
"pytest>=7.0.0",
|
|
73
|
+
"pytest-asyncio>=0.21.0",
|
|
74
|
+
"pytest-cov>=4.0.0",
|
|
75
|
+
"pytest-timeout>=2.0.0",
|
|
76
|
+
"ruff>=0.1.0",
|
|
77
|
+
"mypy>=1.0.0",
|
|
78
|
+
]
|
|
79
|
+
|
|
80
|
+
# Documentation
|
|
81
|
+
docs = [
|
|
82
|
+
"mkdocs>=1.5.0",
|
|
83
|
+
"mkdocs-material>=9.0.0",
|
|
84
|
+
"mkdocs-minify-plugin>=0.7.0",
|
|
85
|
+
"pymdown-extensions>=10.0.0",
|
|
86
|
+
]
|
|
87
|
+
|
|
88
|
+
# Convenience: everything
|
|
89
|
+
full = [
|
|
90
|
+
"torch>=2.0.0",
|
|
91
|
+
"transformers>=4.35.0",
|
|
92
|
+
"datasets>=2.14.0",
|
|
93
|
+
"peft>=0.7.0",
|
|
94
|
+
"accelerate>=0.24.0",
|
|
95
|
+
"bitsandbytes>=0.41.0",
|
|
96
|
+
"textual>=0.52.0",
|
|
97
|
+
"trl>=0.7.0",
|
|
98
|
+
"pytest>=7.0.0",
|
|
99
|
+
"pytest-asyncio>=0.21.0",
|
|
100
|
+
"pytest-cov>=4.0.0",
|
|
101
|
+
"pytest-timeout>=2.0.0",
|
|
102
|
+
"ruff>=0.1.0",
|
|
103
|
+
"mypy>=1.0.0",
|
|
104
|
+
]
|
|
105
|
+
|
|
106
|
+
# ---------------------------------------------------------------------------
|
|
107
|
+
[project.urls]
|
|
108
|
+
Homepage = "https://github.com/Abdur-azure/xlmtec"
|
|
109
|
+
Documentation = "https://Abdur-azure.github.io/xlmtec"
|
|
110
|
+
Issues = "https://github.com/Abdur-azure/xlmtec/issues"
|
|
111
|
+
|
|
112
|
+
[project.scripts]
|
|
113
|
+
xlmtec = "xlmtec.cli.main:main"
|
|
114
|
+
|
|
115
|
+
# ---------------------------------------------------------------------------
|
|
116
|
+
# Setuptools
|
|
117
|
+
# ---------------------------------------------------------------------------
|
|
118
|
+
[tool.setuptools.packages.find]
|
|
119
|
+
where = ["."]
|
|
120
|
+
include = ["xlmtec*"]
|
|
121
|
+
exclude = ["tests*", "examples*", "docs*"]
|
|
122
|
+
|
|
123
|
+
# ---------------------------------------------------------------------------
|
|
124
|
+
# Pytest
|
|
125
|
+
# ---------------------------------------------------------------------------
|
|
126
|
+
[tool.pytest.ini_options]
|
|
127
|
+
testpaths = ["tests"]
|
|
128
|
+
python_files = ["test_*.py"]
|
|
129
|
+
python_classes = ["Test*"]
|
|
130
|
+
python_functions = ["test_*"]
|
|
131
|
+
addopts = "-v --tb=short"
|
|
132
|
+
timeout = 120
|
|
133
|
+
asyncio_mode = "auto"
|
|
134
|
+
|
|
135
|
+
# ---------------------------------------------------------------------------
|
|
136
|
+
# Ruff
|
|
137
|
+
# ---------------------------------------------------------------------------
|
|
138
|
+
[tool.ruff]
|
|
139
|
+
line-length = 100
|
|
140
|
+
target-version = "py310"
|
|
141
|
+
|
|
142
|
+
[tool.ruff.lint]
|
|
143
|
+
select = ["E", "F", "W", "I"]
|
|
144
|
+
ignore = ["E501"]
|
|
145
|
+
|
|
146
|
+
[tool.ruff.lint.per-file-ignores]
|
|
147
|
+
# __init__.py files re-export names for the public API — F401 (unused import) is expected
|
|
148
|
+
"**/__init__.py" = ["F401"]
|
|
149
|
+
# Test files: F811 (redefinition of unused name from import) is fine in fixtures
|
|
150
|
+
"tests/test_*.py" = ["F811"]
|
|
151
|
+
|
|
152
|
+
# ---------------------------------------------------------------------------
|
|
153
|
+
# Mypy
|
|
154
|
+
# ---------------------------------------------------------------------------
|
|
155
|
+
[tool.mypy]
|
|
156
|
+
python_version = "3.10"
|
|
157
|
+
ignore_missing_imports = true
|
|
158
|
+
warn_return_any = false
|
xlmtec-3.15.0/setup.cfg
ADDED