x-transformers 2.0.2__py3-none-any.whl → 2.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1845,6 +1845,7 @@ class AttentionLayers(Module):
1845
1845
  rotary_interpolation_factor = 1.,
1846
1846
  rotary_xpos_scale_base = 512,
1847
1847
  rotary_base_rescale_factor = 1.,
1848
+ rotate_num_heads = None,
1848
1849
  weight_tie_layers = False,
1849
1850
  custom_layers: tuple[str, ...] | None = None,
1850
1851
  layers_execute_order: tuple[int, ...] | None = None,
@@ -2147,7 +2148,7 @@ class AttentionLayers(Module):
2147
2148
 
2148
2149
  if layer_type == 'a':
2149
2150
  self_attn_learned_value_residual = learned_value_residual_mix and not is_first_self_attn
2150
- layer = Attention(dim, heads = heads, causal = causal, qkv_receive_diff_residuals = qkv_receive_diff_residuals, learned_value_residual_mix = self_attn_learned_value_residual, **attn_kwargs)
2151
+ layer = Attention(dim, heads = heads, causal = causal, qkv_receive_diff_residuals = qkv_receive_diff_residuals, learned_value_residual_mix = self_attn_learned_value_residual, rotate_num_heads = rotate_num_heads, **attn_kwargs)
2151
2152
  is_first_self_attn = False
2152
2153
  elif layer_type == 'c':
2153
2154
  layer = Attention(dim, heads = heads, **{**attn_kwargs, **cross_attn_kwargs})
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: x-transformers
3
- Version: 2.0.2
3
+ Version: 2.0.3
4
4
  Summary: X-Transformers
5
5
  Project-URL: Homepage, https://pypi.org/project/x-transformers/
6
6
  Project-URL: Repository, https://github.com/lucidrains/x-transformers
@@ -950,7 +950,8 @@ model_xl = TransformerWrapper(
950
950
  dim = 512,
951
951
  depth = 6,
952
952
  heads = 8,
953
- rotary_pos_emb = True
953
+ rotary_pos_emb = True,
954
+ rotate_num_heads = 4 # only rotate 4 out of the 8 attention heads
954
955
  )
955
956
  )
956
957
 
@@ -1839,6 +1840,15 @@ ids_out, num_out, is_number_mask = model.generate(start_ids, start_nums, 17)
1839
1840
  }
1840
1841
  ```
1841
1842
 
1843
+ ```bibtex
1844
+ @inproceedings{Yang2025RopeTN,
1845
+ title = {Rope to Nope and Back Again: A New Hybrid Attention Strategy},
1846
+ author = {Bowen Yang and Bharat Venkitesh and Dwarak Talupuru and Hangyu Lin and David Cairuz and Phil Blunsom and Acyr F. Locatelli},
1847
+ year = {2025},
1848
+ url = {https://api.semanticscholar.org/CorpusID:276079501}
1849
+ }
1850
+ ```
1851
+
1842
1852
  ```bibtex
1843
1853
  @inproceedings{Chen2023ExtendingCW,
1844
1854
  title = {Extending Context Window of Large Language Models via Positional Interpolation},
@@ -6,10 +6,10 @@ x_transformers/dpo.py,sha256=xt4OuOWhU8pN3OKN2LZAaC2NC8iiEnchqqcrPWVqf0o,3521
6
6
  x_transformers/multi_input.py,sha256=tCh-fTJDj2ib4SMGtsa-AM8MxKzJAQSwqAXOu3HU2mg,9252
7
7
  x_transformers/neo_mlp.py,sha256=XCNnnop9WLarcxap1kGuYc1x8GHvwkZiDRnXOxSl3Po,3452
8
8
  x_transformers/nonautoregressive_wrapper.py,sha256=2NU58hYMgn-4Jzg3mie-mXb0XH_dCN7fjlzd3K1rLUY,10510
9
- x_transformers/x_transformers.py,sha256=1s8KCSfHXMN9TKLFdS-RzzCskBDkh4CuBk2_XRb6IXk,107537
9
+ x_transformers/x_transformers.py,sha256=DV4yUBDarEPwNxXr-DqqDpWuEv6YhydjyNzmYqJXN6Q,107607
10
10
  x_transformers/xl_autoregressive_wrapper.py,sha256=CvZMJ6A6PA-Y_bQAhnORwjJBSl6Vjq2IdW5KTdk8NI8,4195
11
11
  x_transformers/xval.py,sha256=7S00kCuab4tWQa-vf-z-XfzADjVj48MoFIr7VSIvttg,8575
12
- x_transformers-2.0.2.dist-info/METADATA,sha256=tNdI3H2S4HnnGK1hPY3l94FoXH3SB9vGAb55pcah6Yw,86506
13
- x_transformers-2.0.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
14
- x_transformers-2.0.2.dist-info/licenses/LICENSE,sha256=As9u198X-U-vph5noInuUfqsAG2zX_oXPHDmdjwlPPY,1066
15
- x_transformers-2.0.2.dist-info/RECORD,,
12
+ x_transformers-2.0.3.dist-info/METADATA,sha256=ej7Q0_Kg9oalvVsUcIPHv_6msldGcQuJi6t0NkJA1AI,86938
13
+ x_transformers-2.0.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
14
+ x_transformers-2.0.3.dist-info/licenses/LICENSE,sha256=As9u198X-U-vph5noInuUfqsAG2zX_oXPHDmdjwlPPY,1066
15
+ x_transformers-2.0.3.dist-info/RECORD,,