x-transformers 2.0.2__py3-none-any.whl → 2.0.3__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- x_transformers/x_transformers.py +2 -1
- {x_transformers-2.0.2.dist-info → x_transformers-2.0.3.dist-info}/METADATA +12 -2
- {x_transformers-2.0.2.dist-info → x_transformers-2.0.3.dist-info}/RECORD +5 -5
- {x_transformers-2.0.2.dist-info → x_transformers-2.0.3.dist-info}/WHEEL +0 -0
- {x_transformers-2.0.2.dist-info → x_transformers-2.0.3.dist-info}/licenses/LICENSE +0 -0
x_transformers/x_transformers.py
CHANGED
@@ -1845,6 +1845,7 @@ class AttentionLayers(Module):
|
|
1845
1845
|
rotary_interpolation_factor = 1.,
|
1846
1846
|
rotary_xpos_scale_base = 512,
|
1847
1847
|
rotary_base_rescale_factor = 1.,
|
1848
|
+
rotate_num_heads = None,
|
1848
1849
|
weight_tie_layers = False,
|
1849
1850
|
custom_layers: tuple[str, ...] | None = None,
|
1850
1851
|
layers_execute_order: tuple[int, ...] | None = None,
|
@@ -2147,7 +2148,7 @@ class AttentionLayers(Module):
|
|
2147
2148
|
|
2148
2149
|
if layer_type == 'a':
|
2149
2150
|
self_attn_learned_value_residual = learned_value_residual_mix and not is_first_self_attn
|
2150
|
-
layer = Attention(dim, heads = heads, causal = causal, qkv_receive_diff_residuals = qkv_receive_diff_residuals, learned_value_residual_mix = self_attn_learned_value_residual, **attn_kwargs)
|
2151
|
+
layer = Attention(dim, heads = heads, causal = causal, qkv_receive_diff_residuals = qkv_receive_diff_residuals, learned_value_residual_mix = self_attn_learned_value_residual, rotate_num_heads = rotate_num_heads, **attn_kwargs)
|
2151
2152
|
is_first_self_attn = False
|
2152
2153
|
elif layer_type == 'c':
|
2153
2154
|
layer = Attention(dim, heads = heads, **{**attn_kwargs, **cross_attn_kwargs})
|
@@ -1,6 +1,6 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: x-transformers
|
3
|
-
Version: 2.0.
|
3
|
+
Version: 2.0.3
|
4
4
|
Summary: X-Transformers
|
5
5
|
Project-URL: Homepage, https://pypi.org/project/x-transformers/
|
6
6
|
Project-URL: Repository, https://github.com/lucidrains/x-transformers
|
@@ -950,7 +950,8 @@ model_xl = TransformerWrapper(
|
|
950
950
|
dim = 512,
|
951
951
|
depth = 6,
|
952
952
|
heads = 8,
|
953
|
-
rotary_pos_emb = True
|
953
|
+
rotary_pos_emb = True,
|
954
|
+
rotate_num_heads = 4 # only rotate 4 out of the 8 attention heads
|
954
955
|
)
|
955
956
|
)
|
956
957
|
|
@@ -1839,6 +1840,15 @@ ids_out, num_out, is_number_mask = model.generate(start_ids, start_nums, 17)
|
|
1839
1840
|
}
|
1840
1841
|
```
|
1841
1842
|
|
1843
|
+
```bibtex
|
1844
|
+
@inproceedings{Yang2025RopeTN,
|
1845
|
+
title = {Rope to Nope and Back Again: A New Hybrid Attention Strategy},
|
1846
|
+
author = {Bowen Yang and Bharat Venkitesh and Dwarak Talupuru and Hangyu Lin and David Cairuz and Phil Blunsom and Acyr F. Locatelli},
|
1847
|
+
year = {2025},
|
1848
|
+
url = {https://api.semanticscholar.org/CorpusID:276079501}
|
1849
|
+
}
|
1850
|
+
```
|
1851
|
+
|
1842
1852
|
```bibtex
|
1843
1853
|
@inproceedings{Chen2023ExtendingCW,
|
1844
1854
|
title = {Extending Context Window of Large Language Models via Positional Interpolation},
|
@@ -6,10 +6,10 @@ x_transformers/dpo.py,sha256=xt4OuOWhU8pN3OKN2LZAaC2NC8iiEnchqqcrPWVqf0o,3521
|
|
6
6
|
x_transformers/multi_input.py,sha256=tCh-fTJDj2ib4SMGtsa-AM8MxKzJAQSwqAXOu3HU2mg,9252
|
7
7
|
x_transformers/neo_mlp.py,sha256=XCNnnop9WLarcxap1kGuYc1x8GHvwkZiDRnXOxSl3Po,3452
|
8
8
|
x_transformers/nonautoregressive_wrapper.py,sha256=2NU58hYMgn-4Jzg3mie-mXb0XH_dCN7fjlzd3K1rLUY,10510
|
9
|
-
x_transformers/x_transformers.py,sha256=
|
9
|
+
x_transformers/x_transformers.py,sha256=DV4yUBDarEPwNxXr-DqqDpWuEv6YhydjyNzmYqJXN6Q,107607
|
10
10
|
x_transformers/xl_autoregressive_wrapper.py,sha256=CvZMJ6A6PA-Y_bQAhnORwjJBSl6Vjq2IdW5KTdk8NI8,4195
|
11
11
|
x_transformers/xval.py,sha256=7S00kCuab4tWQa-vf-z-XfzADjVj48MoFIr7VSIvttg,8575
|
12
|
-
x_transformers-2.0.
|
13
|
-
x_transformers-2.0.
|
14
|
-
x_transformers-2.0.
|
15
|
-
x_transformers-2.0.
|
12
|
+
x_transformers-2.0.3.dist-info/METADATA,sha256=ej7Q0_Kg9oalvVsUcIPHv_6msldGcQuJi6t0NkJA1AI,86938
|
13
|
+
x_transformers-2.0.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
14
|
+
x_transformers-2.0.3.dist-info/licenses/LICENSE,sha256=As9u198X-U-vph5noInuUfqsAG2zX_oXPHDmdjwlPPY,1066
|
15
|
+
x_transformers-2.0.3.dist-info/RECORD,,
|
File without changes
|
File without changes
|