uni-layer 0.2.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (69) hide show
  1. uni_layer-0.2.0/LICENSE +21 -0
  2. uni_layer-0.2.0/PKG-INFO +459 -0
  3. uni_layer-0.2.0/README.md +403 -0
  4. uni_layer-0.2.0/pyproject.toml +112 -0
  5. uni_layer-0.2.0/setup.cfg +4 -0
  6. uni_layer-0.2.0/setup.py +9 -0
  7. uni_layer-0.2.0/tests/test_all_metrics.py +439 -0
  8. uni_layer-0.2.0/tests/test_analyzer.py +173 -0
  9. uni_layer-0.2.0/tests/test_benchmark.py +407 -0
  10. uni_layer-0.2.0/tests/test_cache.py +96 -0
  11. uni_layer-0.2.0/tests/test_fast_math.py +151 -0
  12. uni_layer-0.2.0/tests/test_hf_adapter.py +342 -0
  13. uni_layer-0.2.0/tests/test_integrations.py +131 -0
  14. uni_layer-0.2.0/tests/test_metrics.py +190 -0
  15. uni_layer-0.2.0/uni_layer/__init__.py +36 -0
  16. uni_layer-0.2.0/uni_layer/benchmark/__init__.py +5 -0
  17. uni_layer-0.2.0/uni_layer/benchmark/runner.py +343 -0
  18. uni_layer-0.2.0/uni_layer/cli.py +157 -0
  19. uni_layer-0.2.0/uni_layer/compression/__init__.py +14 -0
  20. uni_layer-0.2.0/uni_layer/compression/pruner.py +334 -0
  21. uni_layer-0.2.0/uni_layer/core/__init__.py +6 -0
  22. uni_layer-0.2.0/uni_layer/core/analyzer.py +470 -0
  23. uni_layer-0.2.0/uni_layer/core/base_metric.py +185 -0
  24. uni_layer-0.2.0/uni_layer/core/base_metric_cn.py +435 -0
  25. uni_layer-0.2.0/uni_layer/core/cache.py +204 -0
  26. uni_layer-0.2.0/uni_layer/core/schema.py +177 -0
  27. uni_layer-0.2.0/uni_layer/experimental/__init__.py +11 -0
  28. uni_layer-0.2.0/uni_layer/experimental/distiller.py +477 -0
  29. uni_layer-0.2.0/uni_layer/experimental/peft.py +486 -0
  30. uni_layer-0.2.0/uni_layer/integrations/__init__.py +23 -0
  31. uni_layer-0.2.0/uni_layer/integrations/distillation.py +189 -0
  32. uni_layer-0.2.0/uni_layer/integrations/huggingface_peft.py +251 -0
  33. uni_layer-0.2.0/uni_layer/integrations/torch_pruning.py +212 -0
  34. uni_layer-0.2.0/uni_layer/metrics/__init__.py +51 -0
  35. uni_layer-0.2.0/uni_layer/metrics/architecture_specific/__init__.py +5 -0
  36. uni_layer-0.2.0/uni_layer/metrics/architecture_specific/attention_flow.py +194 -0
  37. uni_layer-0.2.0/uni_layer/metrics/bayesian/__init__.py +5 -0
  38. uni_layer-0.2.0/uni_layer/metrics/bayesian/laplace_posterior.py +241 -0
  39. uni_layer-0.2.0/uni_layer/metrics/information_theory/__init__.py +6 -0
  40. uni_layer-0.2.0/uni_layer/metrics/information_theory/entropy.py +111 -0
  41. uni_layer-0.2.0/uni_layer/metrics/information_theory/mutual_information.py +129 -0
  42. uni_layer-0.2.0/uni_layer/metrics/optimization/__init__.py +7 -0
  43. uni_layer-0.2.0/uni_layer/metrics/optimization/fisher_information.py +126 -0
  44. uni_layer-0.2.0/uni_layer/metrics/optimization/gradient_norm.py +134 -0
  45. uni_layer-0.2.0/uni_layer/metrics/optimization/hessian_trace.py +158 -0
  46. uni_layer-0.2.0/uni_layer/metrics/representation/__init__.py +6 -0
  47. uni_layer-0.2.0/uni_layer/metrics/representation/block_influence.py +119 -0
  48. uni_layer-0.2.0/uni_layer/metrics/representation/jacobian_rank.py +156 -0
  49. uni_layer-0.2.0/uni_layer/metrics/robustness/__init__.py +5 -0
  50. uni_layer-0.2.0/uni_layer/metrics/robustness/droplayer.py +221 -0
  51. uni_layer-0.2.0/uni_layer/metrics/spectral/__init__.py +7 -0
  52. uni_layer-0.2.0/uni_layer/metrics/spectral/cka.py +198 -0
  53. uni_layer-0.2.0/uni_layer/metrics/spectral/effective_rank.py +163 -0
  54. uni_layer-0.2.0/uni_layer/metrics/spectral/ntk.py +131 -0
  55. uni_layer-0.2.0/uni_layer/utils/__init__.py +20 -0
  56. uni_layer-0.2.0/uni_layer/utils/fast_math.py +242 -0
  57. uni_layer-0.2.0/uni_layer/utils/hook_utils.py +79 -0
  58. uni_layer-0.2.0/uni_layer/utils/layer_utils.py +254 -0
  59. uni_layer-0.2.0/uni_layer/utils/model_adapter.py +133 -0
  60. uni_layer-0.2.0/uni_layer/utils/report.py +348 -0
  61. uni_layer-0.2.0/uni_layer/visualization/__init__.py +27 -0
  62. uni_layer-0.2.0/uni_layer/visualization/interactive.py +285 -0
  63. uni_layer-0.2.0/uni_layer/visualization/plot_utils.py +238 -0
  64. uni_layer-0.2.0/uni_layer.egg-info/PKG-INFO +459 -0
  65. uni_layer-0.2.0/uni_layer.egg-info/SOURCES.txt +67 -0
  66. uni_layer-0.2.0/uni_layer.egg-info/dependency_links.txt +1 -0
  67. uni_layer-0.2.0/uni_layer.egg-info/not-zip-safe +1 -0
  68. uni_layer-0.2.0/uni_layer.egg-info/requires.txt +38 -0
  69. uni_layer-0.2.0/uni_layer.egg-info/top_level.txt +1 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Uni-Layer Team
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,459 @@
1
+ Metadata-Version: 2.4
2
+ Name: uni-layer
3
+ Version: 0.2.0
4
+ Summary: A Universal Framework for Layer Contribution Analysis
5
+ Author-email: Uni-Layer Team <contact@uni-layer.org>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/GeoffreyWang1117/Uni-Layer
8
+ Project-URL: Repository, https://github.com/GeoffreyWang1117/Uni-Layer
9
+ Project-URL: Issues, https://github.com/GeoffreyWang1117/Uni-Layer/issues
10
+ Keywords: deep learning,layer analysis,interpretability,model compression
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Intended Audience :: Science/Research
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.8
16
+ Classifier: Programming Language :: Python :: 3.9
17
+ Classifier: Programming Language :: Python :: 3.10
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Requires-Python: >=3.8
20
+ Description-Content-Type: text/markdown
21
+ License-File: LICENSE
22
+ Requires-Dist: torch>=1.12.0
23
+ Requires-Dist: numpy>=1.21.0
24
+ Requires-Dist: scipy>=1.7.0
25
+ Requires-Dist: scikit-learn>=1.0.0
26
+ Requires-Dist: matplotlib>=3.5.0
27
+ Requires-Dist: seaborn>=0.11.0
28
+ Requires-Dist: pandas>=1.3.0
29
+ Requires-Dist: tqdm>=4.62.0
30
+ Provides-Extra: dev
31
+ Requires-Dist: pytest>=7.0.0; extra == "dev"
32
+ Requires-Dist: pytest-cov>=3.0.0; extra == "dev"
33
+ Requires-Dist: black>=22.0.0; extra == "dev"
34
+ Requires-Dist: flake8>=4.0.0; extra == "dev"
35
+ Requires-Dist: mypy>=0.950; extra == "dev"
36
+ Requires-Dist: isort>=5.10.0; extra == "dev"
37
+ Provides-Extra: docs
38
+ Requires-Dist: sphinx>=4.5.0; extra == "docs"
39
+ Requires-Dist: sphinx-rtd-theme>=1.0.0; extra == "docs"
40
+ Requires-Dist: sphinx-autodoc-typehints>=1.18.0; extra == "docs"
41
+ Provides-Extra: viz
42
+ Requires-Dist: networkx>=2.6.0; extra == "viz"
43
+ Requires-Dist: plotly>=5.0.0; extra == "viz"
44
+ Provides-Extra: integrations
45
+ Requires-Dist: torch-pruning>=1.2.0; extra == "integrations"
46
+ Requires-Dist: peft>=0.6.0; extra == "integrations"
47
+ Requires-Dist: transformers>=4.20.0; extra == "integrations"
48
+ Provides-Extra: all
49
+ Requires-Dist: transformers>=4.20.0; extra == "all"
50
+ Requires-Dist: timm>=0.6.0; extra == "all"
51
+ Requires-Dist: networkx>=2.6.0; extra == "all"
52
+ Requires-Dist: plotly>=5.0.0; extra == "all"
53
+ Requires-Dist: torch-pruning>=1.2.0; extra == "all"
54
+ Requires-Dist: peft>=0.6.0; extra == "all"
55
+ Dynamic: license-file
56
+
57
+ # Uni-Layer
58
+
59
+ **Understand your layers before you optimize them.**
60
+
61
+ [![PyPI](https://img.shields.io/pypi/v/uni-layer.svg)](https://pypi.org/project/uni-layer/)
62
+ [![Python 3.8+](https://img.shields.io/badge/python-3.8+-blue.svg)](https://www.python.org/downloads/)
63
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
64
+ [![Tests](https://img.shields.io/badge/tests-168%20passed-brightgreen.svg)]()
65
+
66
+ Uni-Layer is a PyTorch toolkit that scores every layer in your neural network across **13 metrics in 7 theoretical categories**. It tells you which layers matter most — so you can prune smarter, fine-tune better, and distill more effectively.
67
+
68
+ **[English](#quick-start)** | **[中文](#中文说明)**
69
+
70
+ ---
71
+
72
+ ## Why Uni-Layer?
73
+
74
+ Most compression and fine-tuning tools treat all layers equally or rely on simple magnitude heuristics. Uni-Layer replaces guesswork with principled, multi-metric layer analysis.
75
+
76
+ There is no other library that does this. Captum does input attribution. Torch-Pruning does structural pruning. TransformerLens does mechanistic interpretability. **Uni-Layer is the only tool that unifies 13 layer importance metrics under one API and bridges them to downstream tools.**
77
+
78
+ | You want to... | Uni-Layer provides | Works with |
79
+ |---|---|---|
80
+ | **Prune** a model | Per-layer importance scores & pruning ratios | [Torch-Pruning](https://github.com/VainF/Torch-Pruning) |
81
+ | **LoRA fine-tune** | Which layers to target, adaptive rank allocation | [HuggingFace PEFT](https://github.com/huggingface/peft) |
82
+ | **Distill** knowledge | Layer pairing & per-layer distillation weights | Any distillation framework |
83
+ | **Understand** a model | Multi-metric layer contribution profile | Standalone |
84
+
85
+ ---
86
+
87
+ ## Quick Start
88
+
89
+ ```bash
90
+ pip install uni-layer
91
+ ```
92
+
93
+ ```python
94
+ from uni_layer import LayerAnalyzer
95
+ from uni_layer.metrics import GradientNorm, CKA, BlockInfluence
96
+
97
+ analyzer = LayerAnalyzer(model, task_type='classification')
98
+ contributions = analyzer.compute_metrics(
99
+ metrics=[GradientNorm(), CKA(), BlockInfluence()],
100
+ data_loader=train_loader,
101
+ )
102
+
103
+ # Rank layers by importance
104
+ for name, score in analyzer.rank_layers(contributions, 'gradient_norm'):
105
+ print(f" {name}: {score:.4f}")
106
+ ```
107
+
108
+ ---
109
+
110
+ ## Output Format
111
+
112
+ Every call to `compute_metrics()` returns a structured dict. Here is a real example from a 4-layer MLP:
113
+
114
+ ```json
115
+ {
116
+ "0": {
117
+ "layer_idx": 0,
118
+ "layer_type": "linear",
119
+ "gradient_norm": 0.0193,
120
+ "gradient_norm_std": 0.0016,
121
+ "cka_score": 0.4161,
122
+ "effective_rank": 10.54,
123
+ "block_influence": 1.0,
124
+ "fisher_information": 0.0001
125
+ },
126
+ "2": {
127
+ "layer_idx": 1,
128
+ "layer_type": "linear",
129
+ "gradient_norm": 0.0494,
130
+ "cka_score": 0.5449,
131
+ "effective_rank": 20.18,
132
+ "block_influence": 1.0,
133
+ "fisher_information": 0.0002
134
+ },
135
+ "4": {
136
+ "layer_idx": 2,
137
+ "layer_type": "linear",
138
+ "gradient_norm": 0.0624,
139
+ "cka_score": 0.6233,
140
+ "effective_rank": 9.58,
141
+ "block_influence": 1.0,
142
+ "fisher_information": 0.0003
143
+ },
144
+ "6": {
145
+ "layer_idx": 3,
146
+ "layer_type": "linear",
147
+ "gradient_norm": 0.1094,
148
+ "cka_score": 1.0,
149
+ "effective_rank": 2.36,
150
+ "block_influence": 1.0,
151
+ "fisher_information": 0.0009
152
+ }
153
+ }
154
+ ```
155
+
156
+ `rank_layers()` returns sorted `(name, score)` tuples:
157
+
158
+ ```python
159
+ [("6", 0.1094), ("4", 0.0624), ("2", 0.0494), ("0", 0.0193)]
160
+ # Layer 6 (output head) contributes most; Layer 0 (input) contributes least.
161
+ ```
162
+
163
+ And here is a 4-block Transformer analyzed with `GradientNorm`, `BlockInfluence`, and `EffectiveRank`:
164
+
165
+ ```
166
+ Layer Type GradNorm BlockInfluence EffectiveRank
167
+ --------------------------------------------------------------------------------
168
+ blocks.0 transformer_block 0.1425 0.0278 94.47
169
+ blocks.1 transformer_block 0.1404 0.0275 94.21
170
+ blocks.2 transformer_block 0.1319 0.0265 93.92
171
+ blocks.3 transformer_block 0.1276 0.0269 93.66
172
+ ```
173
+
174
+ > Early blocks have slightly higher gradient norms — they are adapting more. BlockInfluence is low everywhere (all ~0.027) because residual connections dominate, meaning each block's transformation is small relative to the skip path. EffectiveRank is uniformly high (~94), indicating rich, non-degenerate representations.
175
+
176
+ ---
177
+
178
+ ## 13 Metrics in 7 Categories
179
+
180
+ | Category | Metrics | What it measures |
181
+ |---|---|---|
182
+ | **Optimization** | `GradientNorm`, `HessianTrace`, `FisherInformation` | How much the layer affects the loss landscape |
183
+ | **Spectral** | `CKA`, `EffectiveRank`, `NTKTrace` | Representation similarity, diversity, kernel influence |
184
+ | **Information Theory** | `ActivationEntropy`, `MutualInformation` | Information content and task relevance |
185
+ | **Representation** | `JacobianRank`, `BlockInfluence` | Expressiveness and layer redundancy |
186
+ | **Robustness** | `DropLayerRobustness` | Performance impact of removing the layer |
187
+ | **Bayesian** | `LaplacePosterior` | Parameter uncertainty (Laplace approximation) |
188
+ | **Architecture** | `AttentionFlow` | Attention entropy, head diversity (Transformers) |
189
+
190
+ Each metric returns a dict with a **primary key** (used for ranking) and optional secondary keys:
191
+
192
+ | Metric | Primary Key | Additional Keys |
193
+ |---|---|---|
194
+ | GradientNorm | `gradient_norm` | `gradient_norm_std`, `_max`, `_min` |
195
+ | HessianTrace | `hessian_trace` | `hessian_trace_std` |
196
+ | FisherInformation | `fisher_information` | `fisher_mean` |
197
+ | CKA | `cka_score` | |
198
+ | EffectiveRank | `effective_rank` | `stable_rank`, `rank_ratio` |
199
+ | NTKTrace | `ntk_trace` | `ntk_trace_per_param` |
200
+ | ActivationEntropy | `activation_entropy` | `activation_mean`, `_std`, `_sparsity` |
201
+ | MutualInformation | `mutual_information` | `mi_max`, `mi_std` |
202
+ | JacobianRank | `jacobian_rank` | `jacobian_rank_ratio`, `_condition`, `_max_sv` |
203
+ | BlockInfluence | `block_influence` | `block_similarity` |
204
+ | DropLayerRobustness | `droplayer_loss_increase` | `droplayer_loss_ratio` |
205
+ | LaplacePosterior | `laplace_posterior` | `laplace_posterior_std` |
206
+ | AttentionFlow | `attention_entropy` | `attention_max_weight`, `head_diversity`, `attention_distance` |
207
+
208
+ ---
209
+
210
+ ## Integration Bridges
211
+
212
+ ### Torch-Pruning
213
+
214
+ ```python
215
+ from uni_layer.integrations import TorchPruningBridge
216
+
217
+ bridge = TorchPruningBridge(model, contributions)
218
+
219
+ # Important layers get low pruning ratios, unimportant layers get high ratios
220
+ pruning_ratios = bridge.as_layer_pruning_ratios(
221
+ metric_name='gradient_norm', target_sparsity=0.5
222
+ )
223
+ protected = bridge.get_protected_layers(top_k=3)
224
+
225
+ # Use with torch-pruning
226
+ import torch_pruning as tp
227
+ pruner = tp.pruner.MetaPruner(
228
+ model, example_inputs,
229
+ importance=tp.importance.MagnitudeImportance(),
230
+ pruning_ratio_dict=pruning_ratios,
231
+ )
232
+ ```
233
+
234
+ ### HuggingFace PEFT
235
+
236
+ ```python
237
+ from uni_layer.integrations import HuggingFacePEFTBridge
238
+ from peft import LoraConfig, get_peft_model
239
+
240
+ bridge = HuggingFacePEFTBridge(model, contributions)
241
+
242
+ # Auto-select LoRA targets and adaptive rank
243
+ config_params = bridge.recommend_lora_config_params(metric_name='gradient_norm')
244
+ peft_model = get_peft_model(model, LoraConfig(**config_params))
245
+
246
+ # Or fine-grained control: different rank per layer
247
+ ranks = bridge.recommend_adaptive_ranks(base_rank=8, max_rank=64)
248
+ ```
249
+
250
+ ### Knowledge Distillation
251
+
252
+ ```python
253
+ from uni_layer.integrations import DistillationBridge
254
+
255
+ bridge = DistillationBridge(teacher, student, contributions)
256
+
257
+ pairs = bridge.recommend_layer_pairs(top_k=4) # teacher-student layer mapping
258
+ weights = bridge.recommend_layer_weights() # per-layer distillation weights
259
+ ```
260
+
261
+ ---
262
+
263
+ ## HuggingFace Model Support
264
+
265
+ Uni-Layer natively handles HuggingFace models that return dataclass/dict outputs, with automatic `attention_mask` injection:
266
+
267
+ ```python
268
+ from transformers import AutoModel
269
+ from uni_layer import LayerAnalyzer
270
+ from uni_layer.metrics import GradientNorm, BlockInfluence
271
+
272
+ model = AutoModel.from_pretrained("bert-base-uncased")
273
+ analyzer = LayerAnalyzer(model, task_type='classification')
274
+
275
+ # Just works -- dict outputs, attention_mask, labels all handled automatically
276
+ contributions = analyzer.compute_metrics(
277
+ metrics=[GradientNorm(), BlockInfluence()],
278
+ data_loader=tokenized_loader,
279
+ )
280
+ ```
281
+
282
+ ---
283
+
284
+ ## Examples
285
+
286
+ | Example | Model | File |
287
+ |---|---|---|
288
+ | ResNet layer analysis | ResNet-18 (CNN) | [`examples/resnet_layer_analysis.py`](examples/resnet_layer_analysis.py) |
289
+ | ViT attention analysis | Vision Transformer | [`examples/vit_layer_analysis.py`](examples/vit_layer_analysis.py) |
290
+ | BERT layer analysis + LoRA | BERT-style Transformer | [`examples/bert_layer_analysis.py`](examples/bert_layer_analysis.py) |
291
+ | Torch-Pruning integration | Any model | [`examples/integrate_torch_pruning.py`](examples/integrate_torch_pruning.py) |
292
+ | HuggingFace PEFT integration | Any model | [`examples/integrate_huggingface_peft.py`](examples/integrate_huggingface_peft.py) |
293
+ | Knowledge distillation | Teacher-Student | [`examples/integrate_distillation.py`](examples/integrate_distillation.py) |
294
+
295
+ ---
296
+
297
+ ## Installation
298
+
299
+ ```bash
300
+ pip install uni-layer # core
301
+ pip install uni-layer[integrations] # + torch-pruning, peft, transformers
302
+ pip install uni-layer[dev] # + pytest, black, flake8, mypy
303
+ pip install uni-layer[all] # everything
304
+ ```
305
+
306
+ From source:
307
+
308
+ ```bash
309
+ git clone https://github.com/GeoffreyWang1117/Uni-Layer.git
310
+ cd Uni-Layer && pip install -e ".[dev]"
311
+ ```
312
+
313
+ ---
314
+
315
+ ## Roadmap
316
+
317
+ ### v0.3.0 (Next)
318
+ - [ ] Diffusion model support (UNet timestep-aware analysis)
319
+ - [ ] Mamba / SSM architecture support
320
+ - [ ] MoE router layer analysis
321
+ - [ ] Residual-aware DropLayer metric (understand skip connections)
322
+ - [ ] Layer-to-layer CKA similarity matrix
323
+
324
+ ### v0.4.0
325
+ - [ ] GNN support (PyG MessagePassing layers)
326
+ - [ ] Multi-modal model branch analysis (vision encoder + language decoder)
327
+ - [ ] Wanda-style importance (weight x activation norm)
328
+ - [ ] IG-based sensitivity scoring (IGU-LoRA style)
329
+ - [ ] Export to ONNX / TensorRT optimization hints
330
+
331
+ ### v1.0.0
332
+ - [ ] Stable API with full backward compatibility
333
+ - [ ] Interactive web dashboard for layer analysis
334
+ - [ ] Distributed analysis for large models (FSDP/DeepSpeed)
335
+ - [ ] Pre-computed analysis for popular models (BERT, LLaMA, ViT, etc.)
336
+ - [ ] Academic paper and comprehensive benchmark suite
337
+
338
+ ---
339
+
340
+ ## Citation
341
+
342
+ ```bibtex
343
+ @software{unilayer2025,
344
+ title={Uni-Layer: A Universal Framework for Layer Contribution Analysis},
345
+ author={Geoffrey Wang},
346
+ year={2025},
347
+ url={https://github.com/GeoffreyWang1117/Uni-Layer}
348
+ }
349
+ ```
350
+
351
+ ## License
352
+
353
+ MIT License. See [LICENSE](LICENSE).
354
+
355
+ ---
356
+
357
+ <a id="中文说明"></a>
358
+
359
+ # 中文说明
360
+
361
+ ## Uni-Layer:神经网络层贡献度分析框架
362
+
363
+ **先理解你的层,再优化它们。**
364
+
365
+ Uni-Layer 是一个 PyTorch 工具库,通过 **7 大理论类别的 13 种指标** 为神经网络的每一层打分,告诉你哪些层最重要——从而实现更精准的剪枝、更高效的微调和更有效的蒸馏。
366
+
367
+ ### 核心优势
368
+
369
+ - **唯一的层重要性通用评分库**:Captum 做输入归因,Torch-Pruning 做剪枝,TransformerLens 做机制解释——只有 Uni-Layer 把 13 种层重要性指标统一到一个 API 中
370
+ - **与下游工具解耦**:通过 Bridge 模式无缝连接 Torch-Pruning / PEFT / 蒸馏框架
371
+ - **兼容 HuggingFace**:自动处理 dict/dataclass 输出、attention_mask、labels 透传
372
+
373
+ ### 快速开始
374
+
375
+ ```bash
376
+ pip install uni-layer
377
+ ```
378
+
379
+ ```python
380
+ from uni_layer import LayerAnalyzer
381
+ from uni_layer.metrics import GradientNorm, CKA, BlockInfluence
382
+
383
+ analyzer = LayerAnalyzer(model, task_type='classification')
384
+ contributions = analyzer.compute_metrics(
385
+ metrics=[GradientNorm(), CKA(), BlockInfluence()],
386
+ data_loader=train_loader,
387
+ )
388
+
389
+ # 按重要性排序
390
+ for name, score in analyzer.rank_layers(contributions, 'gradient_norm'):
391
+ print(f" {name}: {score:.4f}")
392
+ ```
393
+
394
+ ### 输出格式
395
+
396
+ `compute_metrics()` 返回结构化字典:
397
+
398
+ ```python
399
+ {
400
+ "layer_name": {
401
+ "layer_idx": 0, # 层索引
402
+ "layer_type": "linear", # 层类型
403
+ "gradient_norm": 0.0193, # 各指标值
404
+ "cka_score": 0.4161,
405
+ "block_influence": 1.0,
406
+ ...
407
+ },
408
+ ...
409
+ }
410
+ ```
411
+
412
+ `rank_layers()` 返回排序后的元组列表:
413
+
414
+ ```python
415
+ [("layer_6", 0.1094), ("layer_4", 0.0624), ...] # 降序
416
+ ```
417
+
418
+ ### 13 种指标
419
+
420
+ | 类别 | 指标 | 衡量内容 |
421
+ |---|---|---|
422
+ | 优化几何 | GradientNorm, HessianTrace, FisherInformation | 层对损失曲面的影响 |
423
+ | 谱方法 | CKA, EffectiveRank, NTKTrace | 表征相似性、多样性、核影响力 |
424
+ | 信息论 | ActivationEntropy, MutualInformation | 信息含量与任务相关性 |
425
+ | 表征结构 | JacobianRank, BlockInfluence | 表达能力与层冗余度 |
426
+ | 鲁棒性 | DropLayerRobustness | 移除该层后的性能损失 |
427
+ | 贝叶斯 | LaplacePosterior | 参数不确定性 |
428
+ | 架构特定 | AttentionFlow | 注意力熵、头多样性 (Transformer) |
429
+
430
+ ### 集成桥
431
+
432
+ ```python
433
+ # Torch-Pruning:重要层少剪,不重要层多剪
434
+ from uni_layer.integrations import TorchPruningBridge
435
+ bridge = TorchPruningBridge(model, contributions)
436
+ ratios = bridge.as_layer_pruning_ratios(target_sparsity=0.5)
437
+
438
+ # PEFT:自动选择 LoRA 目标层和自适应秩
439
+ from uni_layer.integrations import HuggingFacePEFTBridge
440
+ bridge = HuggingFacePEFTBridge(model, contributions)
441
+ config = bridge.recommend_lora_config_params()
442
+
443
+ # 蒸馏:教师-学生层配对和权重分配
444
+ from uni_layer.integrations import DistillationBridge
445
+ bridge = DistillationBridge(teacher, student, contributions)
446
+ pairs = bridge.recommend_layer_pairs(top_k=4)
447
+ ```
448
+
449
+ ### 路线图
450
+
451
+ **v0.3.0**:扩散模型支持 / Mamba-SSM / MoE 路由层分析 / 残差感知 DropLayer / 层间 CKA 矩阵
452
+
453
+ **v0.4.0**:GNN 支持 / 多模态分支分析 / Wanda 重要性 / IG 灵敏度 / ONNX 导出
454
+
455
+ **v1.0.0**:稳定 API / Web 可视化面板 / 分布式分析 / 预计算热门模型 / 学术论文
456
+
457
+ ### 许可证
458
+
459
+ MIT License。详见 [LICENSE](LICENSE)。