vlora-dev 0.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- vlora/__init__.py +73 -0
- vlora/_validate.py +82 -0
- vlora/analysis.py +191 -0
- vlora/cli.py +430 -0
- vlora/integrations/__init__.py +1 -0
- vlora/integrations/huggingface.py +163 -0
- vlora/io.py +191 -0
- vlora/merge.py +229 -0
- vlora/model.py +148 -0
- vlora/ops.py +229 -0
- vlora/pipeline.py +70 -0
- vlora/router.py +173 -0
- vlora/subspace.py +651 -0
- vlora/training.py +149 -0
- vlora_dev-0.2.0.dist-info/METADATA +409 -0
- vlora_dev-0.2.0.dist-info/RECORD +19 -0
- vlora_dev-0.2.0.dist-info/WHEEL +4 -0
- vlora_dev-0.2.0.dist-info/entry_points.txt +2 -0
- vlora_dev-0.2.0.dist-info/licenses/LICENSE +190 -0
vlora/training.py
ADDED
|
@@ -0,0 +1,149 @@
|
|
|
1
|
+
"""Training within the shared subspace — train only loadings, not full LoRA.
|
|
2
|
+
|
|
3
|
+
Instead of optimizing rank × dim parameters per layer (standard LoRA),
|
|
4
|
+
train just k scalar loadings per layer (where k is the number of subspace
|
|
5
|
+
components). This gives 100x+ parameter reduction while staying in the
|
|
6
|
+
space of known-good adapter directions.
|
|
7
|
+
|
|
8
|
+
Usage:
|
|
9
|
+
subspace = SharedSubspace.load("shared_subspace/")
|
|
10
|
+
trainer = SubspaceTrainer(subspace, "new_task")
|
|
11
|
+
|
|
12
|
+
for batch in dataloader:
|
|
13
|
+
loss = compute_loss(trainer.model, batch)
|
|
14
|
+
trainer.step(loss)
|
|
15
|
+
|
|
16
|
+
# Loadings are updated in-place on the subspace
|
|
17
|
+
subspace.save("updated_subspace/")
|
|
18
|
+
"""
|
|
19
|
+
|
|
20
|
+
from __future__ import annotations
|
|
21
|
+
|
|
22
|
+
import torch
|
|
23
|
+
import torch.nn as nn
|
|
24
|
+
from torch import Tensor
|
|
25
|
+
|
|
26
|
+
from vlora.subspace import SharedSubspace, TaskProjection
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def orthogonal_init(
|
|
30
|
+
subspace: SharedSubspace,
|
|
31
|
+
task_id: str,
|
|
32
|
+
scale: float = 0.01,
|
|
33
|
+
) -> TaskProjection:
|
|
34
|
+
"""Initialize a new task with small random loadings.
|
|
35
|
+
|
|
36
|
+
Uses normally-distributed loadings scaled down so the initial adapter
|
|
37
|
+
is near-zero (similar to LoRA's Kaiming + zero init strategy).
|
|
38
|
+
|
|
39
|
+
Args:
|
|
40
|
+
subspace: The shared subspace to initialize within.
|
|
41
|
+
task_id: Name for the new task.
|
|
42
|
+
scale: Standard deviation of initial loadings. Small values mean
|
|
43
|
+
the adapter starts near-identity.
|
|
44
|
+
|
|
45
|
+
Returns:
|
|
46
|
+
TaskProjection registered in the subspace.
|
|
47
|
+
"""
|
|
48
|
+
k = subspace.num_components
|
|
49
|
+
loadings_a = {}
|
|
50
|
+
loadings_b = {}
|
|
51
|
+
|
|
52
|
+
for layer in subspace.layer_names:
|
|
53
|
+
actual_k = subspace.components_a[layer].shape[0]
|
|
54
|
+
loadings_a[layer] = torch.randn(actual_k) * scale
|
|
55
|
+
# Initialize B-side to zero (like standard LoRA) so initial delta is zero
|
|
56
|
+
loadings_b[layer] = torch.zeros(actual_k)
|
|
57
|
+
|
|
58
|
+
proj = TaskProjection(task_id=task_id, loadings_a=loadings_a, loadings_b=loadings_b)
|
|
59
|
+
subspace.tasks[task_id] = proj
|
|
60
|
+
return proj
|
|
61
|
+
|
|
62
|
+
|
|
63
|
+
class SubspaceTrainer:
|
|
64
|
+
"""Minimal training loop for learning task loadings within a subspace.
|
|
65
|
+
|
|
66
|
+
Freezes the shared basis (components) and only optimizes the per-task
|
|
67
|
+
loadings vector. Works with any PyTorch model and loss function.
|
|
68
|
+
|
|
69
|
+
The trainer creates parameters with requires_grad=True from the task's
|
|
70
|
+
loadings and provides an optimizer + step method. Compatible with
|
|
71
|
+
standard PyTorch training patterns and HuggingFace Trainer via
|
|
72
|
+
get_trainable_params().
|
|
73
|
+
"""
|
|
74
|
+
|
|
75
|
+
def __init__(
|
|
76
|
+
self,
|
|
77
|
+
subspace: SharedSubspace,
|
|
78
|
+
task_id: str,
|
|
79
|
+
lr: float = 1e-3,
|
|
80
|
+
num_expand: int = 0,
|
|
81
|
+
optimizer_cls: type = torch.optim.Adam,
|
|
82
|
+
optimizer_kwargs: dict | None = None,
|
|
83
|
+
):
|
|
84
|
+
"""
|
|
85
|
+
Args:
|
|
86
|
+
subspace: Shared subspace (must already contain the task).
|
|
87
|
+
task_id: Task whose loadings to train.
|
|
88
|
+
lr: Learning rate.
|
|
89
|
+
num_expand: Extra orthogonal directions to add to the basis
|
|
90
|
+
via Gram-Schmidt. Gives the optimizer room to escape the
|
|
91
|
+
existing subspace if needed.
|
|
92
|
+
optimizer_cls: PyTorch optimizer class.
|
|
93
|
+
optimizer_kwargs: Extra kwargs for the optimizer.
|
|
94
|
+
"""
|
|
95
|
+
if task_id not in subspace.tasks:
|
|
96
|
+
raise KeyError(
|
|
97
|
+
f"Task '{task_id}' not in subspace. "
|
|
98
|
+
"Use orthogonal_init() or subspace.project() first."
|
|
99
|
+
)
|
|
100
|
+
|
|
101
|
+
self.subspace = subspace
|
|
102
|
+
self.task_id = task_id
|
|
103
|
+
|
|
104
|
+
# Get trainable parameter tensors
|
|
105
|
+
self.params = subspace.get_trainable_params(task_id, num_expand=num_expand)
|
|
106
|
+
|
|
107
|
+
# Build optimizer
|
|
108
|
+
param_list = list(self.params.values())
|
|
109
|
+
kwargs = dict(optimizer_kwargs or {})
|
|
110
|
+
kwargs["lr"] = lr
|
|
111
|
+
self.optimizer = optimizer_cls(param_list, **kwargs)
|
|
112
|
+
|
|
113
|
+
self._step_count = 0
|
|
114
|
+
|
|
115
|
+
def step(self, loss: Tensor) -> float:
|
|
116
|
+
"""Backprop and update loadings from a scalar loss.
|
|
117
|
+
|
|
118
|
+
Args:
|
|
119
|
+
loss: Scalar loss tensor (must have grad_fn).
|
|
120
|
+
|
|
121
|
+
Returns:
|
|
122
|
+
Loss value as float.
|
|
123
|
+
"""
|
|
124
|
+
self.optimizer.zero_grad()
|
|
125
|
+
loss.backward()
|
|
126
|
+
self.optimizer.step()
|
|
127
|
+
self._step_count += 1
|
|
128
|
+
return loss.item()
|
|
129
|
+
|
|
130
|
+
def write_back(self) -> None:
|
|
131
|
+
"""Write trained parameters back to the subspace's TaskProjection.
|
|
132
|
+
|
|
133
|
+
Call this after training is done to persist the learned loadings
|
|
134
|
+
back into the subspace object.
|
|
135
|
+
"""
|
|
136
|
+
proj = self.subspace.tasks[self.task_id]
|
|
137
|
+
for layer in self.subspace.layer_names:
|
|
138
|
+
proj.loadings_a[layer] = self.params[f"{layer}.loadings_a"].detach().clone()
|
|
139
|
+
proj.loadings_b[layer] = self.params[f"{layer}.loadings_b"].detach().clone()
|
|
140
|
+
|
|
141
|
+
@property
|
|
142
|
+
def num_trainable_params(self) -> int:
|
|
143
|
+
"""Total number of trainable scalar parameters."""
|
|
144
|
+
return sum(p.numel() for p in self.params.values())
|
|
145
|
+
|
|
146
|
+
@property
|
|
147
|
+
def step_count(self) -> int:
|
|
148
|
+
"""Number of optimizer steps taken."""
|
|
149
|
+
return self._step_count
|
|
@@ -0,0 +1,409 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: vlora-dev
|
|
3
|
+
Version: 0.2.0
|
|
4
|
+
Summary: Shared low-rank subspaces for efficient LoRA adapter management
|
|
5
|
+
Project-URL: Homepage, https://github.com/tveseli/vlora
|
|
6
|
+
Project-URL: Repository, https://github.com/tveseli/vlora
|
|
7
|
+
Author: Tim Veseli
|
|
8
|
+
License-Expression: Apache-2.0
|
|
9
|
+
License-File: LICENSE
|
|
10
|
+
Keywords: fine-tuning,llm,lora,parameter-efficient,subspace
|
|
11
|
+
Classifier: Development Status :: 3 - Alpha
|
|
12
|
+
Classifier: Intended Audience :: Science/Research
|
|
13
|
+
Classifier: License :: OSI Approved :: Apache Software License
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence
|
|
16
|
+
Requires-Python: >=3.9
|
|
17
|
+
Requires-Dist: click>=8.0
|
|
18
|
+
Requires-Dist: numpy>=1.21
|
|
19
|
+
Requires-Dist: safetensors>=0.4
|
|
20
|
+
Requires-Dist: torch>=2.0
|
|
21
|
+
Provides-Extra: dev
|
|
22
|
+
Requires-Dist: huggingface-hub>=0.20; extra == 'dev'
|
|
23
|
+
Requires-Dist: pre-commit>=3.0; extra == 'dev'
|
|
24
|
+
Requires-Dist: pytest>=7.0; extra == 'dev'
|
|
25
|
+
Requires-Dist: ruff>=0.8; extra == 'dev'
|
|
26
|
+
Provides-Extra: docs
|
|
27
|
+
Requires-Dist: mkdocs-material>=9.5; extra == 'docs'
|
|
28
|
+
Requires-Dist: mkdocstrings[python]>=0.24; extra == 'docs'
|
|
29
|
+
Provides-Extra: hf
|
|
30
|
+
Requires-Dist: huggingface-hub>=0.20; extra == 'hf'
|
|
31
|
+
Requires-Dist: transformers>=4.38; extra == 'hf'
|
|
32
|
+
Provides-Extra: hub
|
|
33
|
+
Requires-Dist: huggingface-hub>=0.20; extra == 'hub'
|
|
34
|
+
Description-Content-Type: text/markdown
|
|
35
|
+
|
|
36
|
+
<p align="center">
|
|
37
|
+
<img src="logo.png" alt="vLoRA" width="400">
|
|
38
|
+
</p>
|
|
39
|
+
|
|
40
|
+
<p align="center">
|
|
41
|
+
<strong>Shared low-rank subspaces for efficient LoRA adapter management.</strong>
|
|
42
|
+
</p>
|
|
43
|
+
|
|
44
|
+
Based on the [Share paper](https://arxiv.org/abs/2602.06043): LoRA adapters across tasks share a common low-rank subspace. Instead of storing *N* separate adapters, maintain **one shared basis** and **per-task coefficient vectors** — achieving up to 122× compression at scale.
|
|
45
|
+
|
|
46
|
+
## Install
|
|
47
|
+
|
|
48
|
+
```bash
|
|
49
|
+
pip install vlora-dev
|
|
50
|
+
```
|
|
51
|
+
|
|
52
|
+
Or from source:
|
|
53
|
+
```bash
|
|
54
|
+
git clone https://github.com/tveseli/vlora.git
|
|
55
|
+
cd vlora
|
|
56
|
+
pip install -e ".[dev]"
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
## Quickstart
|
|
60
|
+
|
|
61
|
+
```python
|
|
62
|
+
from vlora import SharedSubspace, load_adapter
|
|
63
|
+
|
|
64
|
+
# Step 1: Build shared subspace from existing adapters
|
|
65
|
+
adapters = [load_adapter(f"adapters/task_{i}") for i in range(5)]
|
|
66
|
+
subspace = SharedSubspace.from_adapters(adapters, num_components=16)
|
|
67
|
+
|
|
68
|
+
# Step 2: Project a new adapter (only stores small loadings vector)
|
|
69
|
+
new_adapter = load_adapter("adapters/new_task")
|
|
70
|
+
projection = subspace.project(new_adapter, task_id="new_task")
|
|
71
|
+
subspace.add_task(projection)
|
|
72
|
+
|
|
73
|
+
# Step 3: Absorb — recompute basis to include new adapter
|
|
74
|
+
subspace.absorb(load_adapter("adapters/another_task"), new_task_id="another")
|
|
75
|
+
|
|
76
|
+
# Reconstruct any task back to full LoRA weights
|
|
77
|
+
weights = subspace.reconstruct("new_task")
|
|
78
|
+
|
|
79
|
+
# Save / load
|
|
80
|
+
subspace.save("shared_subspace/")
|
|
81
|
+
subspace = SharedSubspace.load("shared_subspace/")
|
|
82
|
+
```
|
|
83
|
+
|
|
84
|
+
## CLI
|
|
85
|
+
|
|
86
|
+
vlora ships with 9 commands for common workflows:
|
|
87
|
+
|
|
88
|
+
```bash
|
|
89
|
+
# Build a shared subspace from adapter directories
|
|
90
|
+
vlora compress adapters/task_0 adapters/task_1 adapters/task_2 -o shared_subspace/
|
|
91
|
+
|
|
92
|
+
# Inspect a subspace (--json for machine-readable output)
|
|
93
|
+
vlora info shared_subspace/
|
|
94
|
+
|
|
95
|
+
# Export a task back to PEFT format (vLLM/TGI compatible)
|
|
96
|
+
vlora export shared_subspace/ task_0 -o exported_adapter/ \
|
|
97
|
+
--alpha 32 --base-model meta-llama/Llama-3-8B --target-modules q_proj,v_proj
|
|
98
|
+
|
|
99
|
+
# Add a new adapter to an existing subspace
|
|
100
|
+
vlora add shared_subspace/ adapters/new_task --task-id new_task --incremental
|
|
101
|
+
|
|
102
|
+
# Analyze adapter similarity and clustering
|
|
103
|
+
vlora analyze adapters/task_0 adapters/task_1 adapters/task_2
|
|
104
|
+
|
|
105
|
+
# Merge adapters using task arithmetic, TIES, or DARE
|
|
106
|
+
vlora merge adapters/task_0 adapters/task_1 adapters/task_2 \
|
|
107
|
+
-o merged/ --method ties --density 0.5
|
|
108
|
+
|
|
109
|
+
# Health check a subspace (NaN, orthonormality, loadings consistency)
|
|
110
|
+
vlora validate shared_subspace/
|
|
111
|
+
|
|
112
|
+
# Compare two tasks within a subspace
|
|
113
|
+
vlora diff shared_subspace/ task_0 task_1
|
|
114
|
+
|
|
115
|
+
# Benchmark subspace operations
|
|
116
|
+
vlora benchmark shared_subspace/
|
|
117
|
+
```
|
|
118
|
+
|
|
119
|
+
## Multi-Task Inference
|
|
120
|
+
|
|
121
|
+
Wrap any PyTorch model with `VLoRAModel` for on-the-fly adapter switching:
|
|
122
|
+
|
|
123
|
+
```python
|
|
124
|
+
from vlora import VLoRAModel, SharedSubspace
|
|
125
|
+
|
|
126
|
+
subspace = SharedSubspace.load("shared_subspace/")
|
|
127
|
+
model = VLoRAModel(base_model, subspace, lora_alpha=32) # or scaling=alpha/rank
|
|
128
|
+
|
|
129
|
+
# Switch adapters instantly — reconstructed from compressed loadings
|
|
130
|
+
model.set_task("task_0")
|
|
131
|
+
output = model(input_ids)
|
|
132
|
+
|
|
133
|
+
model.set_task("task_1") # cached if same task
|
|
134
|
+
output = model(input_ids)
|
|
135
|
+
|
|
136
|
+
print(model.available_tasks) # ["task_0", "task_1", ...]
|
|
137
|
+
```
|
|
138
|
+
|
|
139
|
+
## Training in the Subspace
|
|
140
|
+
|
|
141
|
+
Train only the loadings vector (k params per layer) instead of full LoRA matrices — 100×+ parameter reduction:
|
|
142
|
+
|
|
143
|
+
```python
|
|
144
|
+
from vlora import SharedSubspace, orthogonal_init, SubspaceTrainer
|
|
145
|
+
|
|
146
|
+
subspace = SharedSubspace.load("shared_subspace/")
|
|
147
|
+
orthogonal_init(subspace, "new_task") # initialize near-zero
|
|
148
|
+
|
|
149
|
+
trainer = SubspaceTrainer(subspace, "new_task", lr=1e-3)
|
|
150
|
+
print(f"Trainable params: {trainer.num_trainable_params}") # e.g. 192 vs 200K
|
|
151
|
+
|
|
152
|
+
for batch in dataloader:
|
|
153
|
+
loss = compute_loss(model, batch)
|
|
154
|
+
trainer.step(loss)
|
|
155
|
+
|
|
156
|
+
trainer.write_back() # persist learned loadings
|
|
157
|
+
subspace.save("updated_subspace/")
|
|
158
|
+
```
|
|
159
|
+
|
|
160
|
+
## Task Router
|
|
161
|
+
|
|
162
|
+
Automatically blend adapters per input using a lightweight router:
|
|
163
|
+
|
|
164
|
+
```python
|
|
165
|
+
from vlora import TaskRouter, SharedSubspace
|
|
166
|
+
|
|
167
|
+
subspace = SharedSubspace.load("shared_subspace/")
|
|
168
|
+
router = TaskRouter.from_subspace(subspace, input_dim=4096)
|
|
169
|
+
|
|
170
|
+
# Router produces soft blend weights over tasks
|
|
171
|
+
x = get_input_embedding(batch) # (B, 4096)
|
|
172
|
+
blended = router.blend_loadings(x, subspace)
|
|
173
|
+
subspace.tasks["__routed__"] = blended
|
|
174
|
+
recon = subspace.reconstruct("__routed__")
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
## Adapter Analysis
|
|
178
|
+
|
|
179
|
+
Analyze relationships between adapters before compression:
|
|
180
|
+
|
|
181
|
+
```python
|
|
182
|
+
from vlora import load_adapter, compute_similarity_matrix, find_clusters, adapter_diff
|
|
183
|
+
|
|
184
|
+
adapters = [load_adapter(f"adapters/task_{i}") for i in range(10)]
|
|
185
|
+
|
|
186
|
+
# Pairwise cosine similarity
|
|
187
|
+
sim_matrix = compute_similarity_matrix(adapters)
|
|
188
|
+
|
|
189
|
+
# Find redundant adapter groups
|
|
190
|
+
clusters = find_clusters(sim_matrix, threshold=0.9)
|
|
191
|
+
|
|
192
|
+
# Per-layer comparison of two adapters
|
|
193
|
+
diff = adapter_diff(adapters[0], adapters[1])
|
|
194
|
+
```
|
|
195
|
+
|
|
196
|
+
## Adapter Merging
|
|
197
|
+
|
|
198
|
+
Merge multiple adapters into one using state-of-the-art techniques:
|
|
199
|
+
|
|
200
|
+
```python
|
|
201
|
+
from vlora import load_adapter, task_arithmetic, ties_merge, dare_merge
|
|
202
|
+
|
|
203
|
+
adapters = [load_adapter(f"adapters/task_{i}") for i in range(3)]
|
|
204
|
+
|
|
205
|
+
# Simple weighted average
|
|
206
|
+
merged = task_arithmetic(adapters, weights=[0.5, 0.3, 0.2])
|
|
207
|
+
|
|
208
|
+
# TIES: trim small values, elect sign by majority, average (reduces interference)
|
|
209
|
+
merged = ties_merge(adapters, density=0.5)
|
|
210
|
+
|
|
211
|
+
# DARE: randomly drop & rescale before averaging (sparsification regularizer)
|
|
212
|
+
merged = dare_merge(adapters, drop_rate=0.5, seed=42)
|
|
213
|
+
```
|
|
214
|
+
|
|
215
|
+
## Advanced Compression
|
|
216
|
+
|
|
217
|
+
```python
|
|
218
|
+
# Adaptive k: different components per layer based on explained variance
|
|
219
|
+
subspace = SharedSubspace.from_adapters(adapters, adaptive_k=True, variance_threshold=0.9)
|
|
220
|
+
|
|
221
|
+
# Quantize components for smaller memory footprint
|
|
222
|
+
subspace.quantize(bits=8) # or bits=4
|
|
223
|
+
|
|
224
|
+
# Check compression stats
|
|
225
|
+
stats = subspace.compression_stats()
|
|
226
|
+
print(f"Compression ratio: {stats['compression_ratio']:.1f}×")
|
|
227
|
+
print(f"Compressed: {stats['total_params_compressed']:,} params")
|
|
228
|
+
print(f"Original: {stats['total_params_original']:,} params")
|
|
229
|
+
```
|
|
230
|
+
|
|
231
|
+
## Incremental Updates
|
|
232
|
+
|
|
233
|
+
Scale to thousands of adapters without loading them all at once:
|
|
234
|
+
|
|
235
|
+
```python
|
|
236
|
+
# Streaming: load adapters one at a time from disk
|
|
237
|
+
subspace = SharedSubspace.from_adapters_streaming(
|
|
238
|
+
adapter_paths, num_components=8
|
|
239
|
+
)
|
|
240
|
+
|
|
241
|
+
# Incremental absorb: fast O(1) update without full SVD recompute
|
|
242
|
+
subspace.absorb_incremental(new_adapter, "new_task")
|
|
243
|
+
|
|
244
|
+
# Move to GPU / change precision
|
|
245
|
+
subspace.to(device="cuda", dtype=torch.float16)
|
|
246
|
+
```
|
|
247
|
+
|
|
248
|
+
## The 3-Step Algorithm
|
|
249
|
+
|
|
250
|
+
| Step | Method | What happens |
|
|
251
|
+
|------|--------|-------------|
|
|
252
|
+
| **1. Initialize** | `SharedSubspace.from_adapters()` | SVD on stacked weight matrices → shared basis |
|
|
253
|
+
| **2. Project** | `subspace.project()` | New adapter → small loadings vector |
|
|
254
|
+
| **3. Absorb** | `subspace.absorb()` | Incorporate new adapter, recompute basis |
|
|
255
|
+
|
|
256
|
+
## API Reference
|
|
257
|
+
|
|
258
|
+
### Core
|
|
259
|
+
|
|
260
|
+
- **`SharedSubspace`** — Central state container. Holds per-layer basis and per-task loadings.
|
|
261
|
+
- `.from_adapters(adapters, ...)` — Build from existing adapters
|
|
262
|
+
- `.from_adapters_streaming(paths, ...)` — Build one adapter at a time from disk
|
|
263
|
+
- `.project(adapter, task_id)` → `TaskProjection`
|
|
264
|
+
- `.add_task(projection)` — Register a projected task
|
|
265
|
+
- `.reconstruct(task_id)` → `LoRAWeights`
|
|
266
|
+
- `.absorb(adapter, task_id)` — Incorporate + recompute (full SVD)
|
|
267
|
+
- `.absorb_incremental(adapter, task_id)` — Fast incremental update
|
|
268
|
+
- `.get_trainable_params(task_id)` — For training integration
|
|
269
|
+
- `.quantize(bits=8)` — Quantize components (int8/int4)
|
|
270
|
+
- `.compression_stats()` — Compression ratio and parameter counts
|
|
271
|
+
- `.to(device, dtype)` — Move tensors to device/dtype
|
|
272
|
+
- `.save(path)` / `.load(path)` — Serialization
|
|
273
|
+
|
|
274
|
+
### Model Integration
|
|
275
|
+
|
|
276
|
+
- **`VLoRAModel(base_model, subspace, lora_alpha=None)`** — Inference wrapper with forward hooks
|
|
277
|
+
- `.set_task(task_id)` — Switch adapter (cached)
|
|
278
|
+
- `.clear_task()` — Remove adapter
|
|
279
|
+
- `.available_tasks` — List task IDs
|
|
280
|
+
- `.reconstruct_state_dict(task_id)` — Get delta weight dict
|
|
281
|
+
- `.compile()` — torch.compile the base model for faster inference
|
|
282
|
+
|
|
283
|
+
### Training
|
|
284
|
+
|
|
285
|
+
- **`orthogonal_init(subspace, task_id)`** — Initialize new task with small loadings
|
|
286
|
+
- **`SubspaceTrainer(subspace, task_id)`** — Optimizer wrapper for loadings-only training
|
|
287
|
+
- `.step(loss)` — Backprop + update
|
|
288
|
+
- `.write_back()` — Persist to subspace
|
|
289
|
+
|
|
290
|
+
### Router
|
|
291
|
+
|
|
292
|
+
- **`TaskRouter(input_dim, num_tasks)`** — Lightweight adapter routing MLP
|
|
293
|
+
- `.from_subspace(subspace, input_dim)` — Auto-create from subspace
|
|
294
|
+
- `.blend_loadings(x, subspace)` — Per-input adapter blending
|
|
295
|
+
|
|
296
|
+
### Merging
|
|
297
|
+
|
|
298
|
+
- **`task_arithmetic(adapters, weights=None)`** — Weighted average merge
|
|
299
|
+
- **`ties_merge(adapters, density=0.5, weights=None)`** — Trim + elect sign + merge
|
|
300
|
+
- **`dare_merge(adapters, drop_rate=0.5, weights=None, seed=None)`** — Drop and rescale merge
|
|
301
|
+
|
|
302
|
+
### Analysis
|
|
303
|
+
|
|
304
|
+
- **`compute_similarity_matrix(adapters)`** — Pairwise cosine similarity
|
|
305
|
+
- **`find_clusters(sim_matrix, threshold)`** — Greedy clustering
|
|
306
|
+
- **`adapter_diff(a, b)`** — Per-layer L2 distance + cosine similarity
|
|
307
|
+
- **`subspace_coverage(subspace, adapter)`** — How well subspace represents an adapter
|
|
308
|
+
- **`find_outliers(adapters, threshold)`** — Detect statistical outlier adapters
|
|
309
|
+
|
|
310
|
+
### I/O
|
|
311
|
+
|
|
312
|
+
- **`load_adapter(path)`** — Load PEFT adapter from disk (safetensors)
|
|
313
|
+
- **`load_adapter_from_hub(repo_id)`** — Load from HuggingFace Hub
|
|
314
|
+
- **`save_adapter(weights, path)`** — Save back to PEFT format
|
|
315
|
+
|
|
316
|
+
### Pipeline (convenience)
|
|
317
|
+
|
|
318
|
+
- **`init_subspace(paths, ...)`** — Load + build in one call
|
|
319
|
+
- **`absorb_task(subspace, path, task_id)`** — Load + absorb
|
|
320
|
+
- **`extract_adapter(subspace, task_id, path)`** — Reconstruct + save
|
|
321
|
+
|
|
322
|
+
### Math ops
|
|
323
|
+
|
|
324
|
+
- `compute_svd`, `project_onto_subspace`, `reconstruct_from_subspace`
|
|
325
|
+
- `gram_schmidt`, `explained_variance_ratio`, `select_num_components`
|
|
326
|
+
- `incremental_svd_update`
|
|
327
|
+
|
|
328
|
+
## Benchmarks — Real-World Adapters
|
|
329
|
+
|
|
330
|
+
Tested with 8 [Lots-of-LoRAs](https://huggingface.co/Lots-of-LoRAs) adapters (Mistral-7B, rank 16, 96 layers each):
|
|
331
|
+
|
|
332
|
+
**Variance explained** — the B matrices share structure much more strongly:
|
|
333
|
+
|
|
334
|
+
| k | Variance (A) | Variance (B) |
|
|
335
|
+
|---|-------------|-------------|
|
|
336
|
+
| 1 | 0.19 | 0.43 |
|
|
337
|
+
| 2 | 0.37 | 0.73 |
|
|
338
|
+
| 4 | 0.69 | 0.95 |
|
|
339
|
+
| 6 | 1.00 | 1.00 |
|
|
340
|
+
|
|
341
|
+
**Reconstruction error** (relative L2 norm):
|
|
342
|
+
|
|
343
|
+
| k | Mean Error | Max Error |
|
|
344
|
+
|---|-----------|-----------|
|
|
345
|
+
| 1 | 0.826 | 0.938 |
|
|
346
|
+
| 4 | 0.387 | 0.846 |
|
|
347
|
+
| 6 | 0.000002 | 0.000003 |
|
|
348
|
+
|
|
349
|
+
**Compression at scale** — shared basis is a one-time cost; each new adapter adds only k loadings per layer:
|
|
350
|
+
|
|
351
|
+
| N adapters | Full (MB) | vLoRA (MB) | Ratio |
|
|
352
|
+
|-----------|----------|-----------|-------|
|
|
353
|
+
| 8 | 288 | 288 | 1.0× |
|
|
354
|
+
| 100 | 3,600 | 289 | 12.5× |
|
|
355
|
+
| 1,000 | 36,000 | 293 | 122.8× |
|
|
356
|
+
|
|
357
|
+
Run the benchmark yourself:
|
|
358
|
+
```bash
|
|
359
|
+
pip install vlora-dev[hub]
|
|
360
|
+
python examples/real_adapters.py
|
|
361
|
+
```
|
|
362
|
+
|
|
363
|
+
## HuggingFace Trainer Integration
|
|
364
|
+
|
|
365
|
+
Train in the subspace directly with HuggingFace Trainer:
|
|
366
|
+
|
|
367
|
+
```python
|
|
368
|
+
from vlora import SharedSubspace, orthogonal_init
|
|
369
|
+
from vlora.integrations.huggingface import VLoRACallback
|
|
370
|
+
|
|
371
|
+
subspace = SharedSubspace.load("shared_subspace/")
|
|
372
|
+
orthogonal_init(subspace, "new_task")
|
|
373
|
+
|
|
374
|
+
callback = VLoRACallback(subspace, "new_task", lr=1e-3)
|
|
375
|
+
trainer = Trainer(model=base_model, args=args, callbacks=[callback])
|
|
376
|
+
trainer.train()
|
|
377
|
+
subspace.save("updated_subspace/")
|
|
378
|
+
```
|
|
379
|
+
|
|
380
|
+
## Documentation
|
|
381
|
+
|
|
382
|
+
- [Quickstart notebook](examples/quickstart.ipynb) — try vlora in Google Colab
|
|
383
|
+
- [Migration from PEFT](docs/migration_from_peft.md) — integrate into existing workflow
|
|
384
|
+
- [vLLM guide](docs/guide_vllm.md) — serve with vLLM
|
|
385
|
+
- [TGI guide](docs/guide_tgi.md) — serve with TGI
|
|
386
|
+
- [Ollama guide](docs/guide_ollama.md) — local inference via GGUF
|
|
387
|
+
|
|
388
|
+
## Dependencies
|
|
389
|
+
|
|
390
|
+
- `torch >= 2.0`
|
|
391
|
+
- `safetensors >= 0.4`
|
|
392
|
+
- `click >= 8.0`
|
|
393
|
+
- `huggingface-hub >= 0.20` *(optional, `pip install vlora-dev[hub]`)*
|
|
394
|
+
- `transformers >= 4.38` *(optional, `pip install vlora-dev[hf]`)*
|
|
395
|
+
|
|
396
|
+
## Citation
|
|
397
|
+
|
|
398
|
+
```bibtex
|
|
399
|
+
@article{share2025,
|
|
400
|
+
title={Share: Shared Low-Rank Subspaces for Efficient LoRA Adapter Management},
|
|
401
|
+
year={2025},
|
|
402
|
+
eprint={2602.06043},
|
|
403
|
+
archivePrefix={arXiv},
|
|
404
|
+
}
|
|
405
|
+
```
|
|
406
|
+
|
|
407
|
+
## License
|
|
408
|
+
|
|
409
|
+
Apache 2.0
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
vlora/__init__.py,sha256=0_fh6FMtuSAA6vkVETsJ9zuXaHL5YfM5rDaOqHu_d6Q,1869
|
|
2
|
+
vlora/_validate.py,sha256=Oe3TjMHXCmBIsNvbwDVu2XaRZucyYozBaogJq0jdd-g,2836
|
|
3
|
+
vlora/analysis.py,sha256=o73beYa0KSQGkLJiQYYm2Q7CfvnX9EmA_BSOw-ac_9o,5902
|
|
4
|
+
vlora/cli.py,sha256=doKu85cakvo7EF3y666robnkX36SEQLONGTG5ZkX5TI,16713
|
|
5
|
+
vlora/io.py,sha256=zrxu0DqYgA5latTPqhYr2waxVWPhpDezODLULK5TrtQ,6076
|
|
6
|
+
vlora/merge.py,sha256=u_rB5fEQ6OlS-Bx8Mw99Hw9iqIxWe2xTzqscQIpRvFY,7808
|
|
7
|
+
vlora/model.py,sha256=G-gniQTSfYg8grCV0gB4lz_9NBi63ANMOvZfj5ZfSPk,5084
|
|
8
|
+
vlora/ops.py,sha256=_WhjC9piB2o92NKJ3Vz3826mytVasJSSTlFJS_X4glM,7837
|
|
9
|
+
vlora/pipeline.py,sha256=zHlXS4CrYrDiDe8FrymlPxKyZJ-jdAB7LEjeYn2_c7I,1991
|
|
10
|
+
vlora/router.py,sha256=S8IqU9QQEFnjSNQUCUbZGyM8xR2dhIPq8ULtkzirp9s,5833
|
|
11
|
+
vlora/subspace.py,sha256=22ifozxo1es5xgsTziPQ6RcdPhjljoMiIDMvld5HIiU,25917
|
|
12
|
+
vlora/training.py,sha256=G17RupEZBKaPilQm7FYEpkdOd2vIh0yhroksK6-WzZg,5000
|
|
13
|
+
vlora/integrations/__init__.py,sha256=emZA-dvsVgGeF1tMYh3b_uesw0HTMJfXtehb_XctFP4,72
|
|
14
|
+
vlora/integrations/huggingface.py,sha256=oB3a28VWC8QaVowOiX6AtQoHrU3uZc69pcEYN3pV87s,5205
|
|
15
|
+
vlora_dev-0.2.0.dist-info/METADATA,sha256=z5GrMOQVGPcPYbRZoOWPE-Q8WeXIUjp84DtUS56rPvs,13367
|
|
16
|
+
vlora_dev-0.2.0.dist-info/WHEEL,sha256=WLgqFyCfm_KASv4WHyYy0P3pM_m7J5L9k2skdKLirC8,87
|
|
17
|
+
vlora_dev-0.2.0.dist-info/entry_points.txt,sha256=X5j7ZRfL3epLsn_zGGLAuL32KOL5UwVsBiR9cI7GDB0,40
|
|
18
|
+
vlora_dev-0.2.0.dist-info/licenses/LICENSE,sha256=O24RapkFxHFTFVUrn_Vbwz7pFSNsarkqy7XqzGtt54U,10765
|
|
19
|
+
vlora_dev-0.2.0.dist-info/RECORD,,
|