PyPI - x-transformers - Versions diffs - 2.0.2__tar.gz → 2.0.3__tar.gz - Mend

x-transformers 2.0.2tar.gz → 2.0.3tar.gz

Files changed (55) hide show

{x_transformers-2.0.2 → x_transformers-2.0.3}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: x-transformers
-Version: 2.0.2
+Version: 2.0.3
 Summary: X-Transformers
 Project-URL: Homepage, https://pypi.org/project/x-transformers/
 Project-URL: Repository, https://github.com/lucidrains/x-transformers
@@ -950,7 +950,8 @@ model_xl = TransformerWrapper(
         dim = 512,
         depth = 6,
         heads = 8,
-        rotary_pos_emb = True
+        rotary_pos_emb = True,
+        rotate_num_heads = 4   # only rotate 4 out of the 8 attention heads
     )
 )
@@ -1839,6 +1840,15 @@ ids_out, num_out, is_number_mask = model.generate(start_ids, start_nums, 17)
 }
 ```
+```bibtex
+@inproceedings{Yang2025RopeTN,
+    title   = {Rope to Nope and Back Again: A New Hybrid Attention Strategy},
+    author  = {Bowen Yang and Bharat Venkitesh and Dwarak Talupuru and Hangyu Lin and David Cairuz and Phil Blunsom and Acyr F. Locatelli},
+    year    = {2025},
+    url     = {https://api.semanticscholar.org/CorpusID:276079501}
+}
+```
 ```bibtex
 @inproceedings{Chen2023ExtendingCW,
     title   = {Extending Context Window of Large Language Models via Positional Interpolation},

{x_transformers-2.0.2 → x_transformers-2.0.3}/README.md RENAMED Viewed

@@ -901,7 +901,8 @@ model_xl = TransformerWrapper(
         dim = 512,
         depth = 6,
         heads = 8,
-        rotary_pos_emb = True
+        rotary_pos_emb = True,
+        rotate_num_heads = 4   # only rotate 4 out of the 8 attention heads
     )
 )
@@ -1790,6 +1791,15 @@ ids_out, num_out, is_number_mask = model.generate(start_ids, start_nums, 17)
 }
 ```
+```bibtex
+@inproceedings{Yang2025RopeTN,
+    title   = {Rope to Nope and Back Again: A New Hybrid Attention Strategy},
+    author  = {Bowen Yang and Bharat Venkitesh and Dwarak Talupuru and Hangyu Lin and David Cairuz and Phil Blunsom and Acyr F. Locatelli},
+    year    = {2025},
+    url     = {https://api.semanticscholar.org/CorpusID:276079501}
+}
+```
 ```bibtex
 @inproceedings{Chen2023ExtendingCW,
     title   = {Extending Context Window of Large Language Models via Positional Interpolation},

{x_transformers-2.0.2 → x_transformers-2.0.3}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "x-transformers"
-version = "2.0.2"
+version = "2.0.3"
 description = "X-Transformers"
 authors = [
     { name = "Phil Wang", email = "lucidrains@gmail.com" }

{x_transformers-2.0.2 → x_transformers-2.0.3}/x_transformers/x_transformers.py RENAMED Viewed

@@ -1845,6 +1845,7 @@ class AttentionLayers(Module):
         rotary_interpolation_factor = 1.,
         rotary_xpos_scale_base = 512,
         rotary_base_rescale_factor = 1.,
+        rotate_num_heads = None,
         weight_tie_layers = False,
         custom_layers: tuple[str, ...] | None = None,
         layers_execute_order: tuple[int, ...] | None = None,
@@ -2147,7 +2148,7 @@ class AttentionLayers(Module):
             if layer_type == 'a':
                 self_attn_learned_value_residual = learned_value_residual_mix and not is_first_self_attn
-                layer = Attention(dim, heads = heads, causal = causal, qkv_receive_diff_residuals = qkv_receive_diff_residuals, learned_value_residual_mix = self_attn_learned_value_residual, **attn_kwargs)
+                layer = Attention(dim, heads = heads, causal = causal, qkv_receive_diff_residuals = qkv_receive_diff_residuals, learned_value_residual_mix = self_attn_learned_value_residual, rotate_num_heads = rotate_num_heads, **attn_kwargs)
                 is_first_self_attn = False
             elif layer_type == 'c':
                 layer = Attention(dim, heads = heads, **{**attn_kwargs, **cross_attn_kwargs})

{x_transformers-2.0.2 → x_transformers-2.0.3}/.github/FUNDING.yml RENAMED Viewed

File without changes

{x_transformers-2.0.2 → x_transformers-2.0.3}/.github/workflows/python-publish.yml RENAMED Viewed

File without changes

{x_transformers-2.0.2 → x_transformers-2.0.3}/.github/workflows/python-test.yaml RENAMED Viewed

File without changes

{x_transformers-2.0.2 → x_transformers-2.0.3}/.gitignore RENAMED Viewed

File without changes

{x_transformers-2.0.2 → x_transformers-2.0.3}/LICENSE RENAMED Viewed

File without changes

{x_transformers-2.0.2 → x_transformers-2.0.3}/images/all-attention.png RENAMED Viewed

File without changes

{x_transformers-2.0.2 → x_transformers-2.0.3}/images/attention-on-attention.png RENAMED Viewed

File without changes

{x_transformers-2.0.2 → x_transformers-2.0.3}/images/cosine-sim-attention.png RENAMED Viewed

File without changes

{x_transformers-2.0.2 → x_transformers-2.0.3}/images/deepnorm.png RENAMED Viewed

File without changes

{x_transformers-2.0.2 → x_transformers-2.0.3}/images/dynamic-pos-bias-linear.png RENAMED Viewed

File without changes

{x_transformers-2.0.2 → x_transformers-2.0.3}/images/dynamic-pos-bias-log.png RENAMED Viewed

File without changes

{x_transformers-2.0.2 → x_transformers-2.0.3}/images/dynamic-pos-bias-sinusoidal.png RENAMED Viewed

File without changes

{x_transformers-2.0.2 → x_transformers-2.0.3}/images/dynamic-pos-bias.png RENAMED Viewed

File without changes

{x_transformers-2.0.2 → x_transformers-2.0.3}/images/enhanced-recurrence.png RENAMED Viewed

File without changes

{x_transformers-2.0.2 → x_transformers-2.0.3}/images/fcm.png RENAMED Viewed

File without changes

{x_transformers-2.0.2 → x_transformers-2.0.3}/images/ffglu.png RENAMED Viewed

File without changes

{x_transformers-2.0.2 → x_transformers-2.0.3}/images/flash-attention.png RENAMED Viewed

File without changes

{x_transformers-2.0.2 → x_transformers-2.0.3}/images/gate_values.png RENAMED Viewed

File without changes

{x_transformers-2.0.2 → x_transformers-2.0.3}/images/gating.png RENAMED Viewed

File without changes

{x_transformers-2.0.2 → x_transformers-2.0.3}/images/length-extrapolation-scale.png RENAMED Viewed

File without changes

{x_transformers-2.0.2 → x_transformers-2.0.3}/images/macaron-1.png RENAMED Viewed

File without changes

{x_transformers-2.0.2 → x_transformers-2.0.3}/images/macaron-2.png RENAMED Viewed

File without changes

{x_transformers-2.0.2 → x_transformers-2.0.3}/images/memory-transformer.png RENAMED Viewed

File without changes

{x_transformers-2.0.2 → x_transformers-2.0.3}/images/normformer.png RENAMED Viewed

File without changes

{x_transformers-2.0.2 → x_transformers-2.0.3}/images/pia.png RENAMED Viewed

File without changes

{x_transformers-2.0.2 → x_transformers-2.0.3}/images/qknorm-analysis.png RENAMED Viewed

File without changes

{x_transformers-2.0.2 → x_transformers-2.0.3}/images/resi_dual.png RENAMED Viewed

File without changes

{x_transformers-2.0.2 → x_transformers-2.0.3}/images/residual_attn.png RENAMED Viewed

File without changes

{x_transformers-2.0.2 → x_transformers-2.0.3}/images/rezero.png RENAMED Viewed

File without changes

{x_transformers-2.0.2 → x_transformers-2.0.3}/images/rotary.png RENAMED Viewed

File without changes

{x_transformers-2.0.2 → x_transformers-2.0.3}/images/sandwich-2.png RENAMED Viewed

File without changes

{x_transformers-2.0.2 → x_transformers-2.0.3}/images/sandwich.png RENAMED Viewed

File without changes

{x_transformers-2.0.2 → x_transformers-2.0.3}/images/sandwich_norm.png RENAMED Viewed

File without changes

{x_transformers-2.0.2 → x_transformers-2.0.3}/images/scalenorm.png RENAMED Viewed

File without changes

{x_transformers-2.0.2 → x_transformers-2.0.3}/images/talking-heads.png RENAMED Viewed

File without changes

{x_transformers-2.0.2 → x_transformers-2.0.3}/images/topk-attention.png RENAMED Viewed

File without changes

{x_transformers-2.0.2 → x_transformers-2.0.3}/images/xval.png RENAMED Viewed

File without changes

{x_transformers-2.0.2 → x_transformers-2.0.3}/tests/test_x_transformers.py RENAMED Viewed

File without changes

{x_transformers-2.0.2 → x_transformers-2.0.3}/train_copy.py RENAMED Viewed

File without changes

{x_transformers-2.0.2 → x_transformers-2.0.3}/train_enwik8.py RENAMED Viewed

File without changes

{x_transformers-2.0.2 → x_transformers-2.0.3}/train_parity.py RENAMED Viewed

File without changes

{x_transformers-2.0.2 → x_transformers-2.0.3}/x_transformers/__init__.py RENAMED Viewed

File without changes

{x_transformers-2.0.2 → x_transformers-2.0.3}/x_transformers/attend.py RENAMED Viewed

File without changes

{x_transformers-2.0.2 → x_transformers-2.0.3}/x_transformers/autoregressive_wrapper.py RENAMED Viewed

File without changes

{x_transformers-2.0.2 → x_transformers-2.0.3}/x_transformers/continuous.py RENAMED Viewed

File without changes

{x_transformers-2.0.2 → x_transformers-2.0.3}/x_transformers/dpo.py RENAMED Viewed

File without changes

{x_transformers-2.0.2 → x_transformers-2.0.3}/x_transformers/multi_input.py RENAMED Viewed

File without changes

{x_transformers-2.0.2 → x_transformers-2.0.3}/x_transformers/neo_mlp.py RENAMED Viewed

File without changes

{x_transformers-2.0.2 → x_transformers-2.0.3}/x_transformers/nonautoregressive_wrapper.py RENAMED Viewed

File without changes

{x_transformers-2.0.2 → x_transformers-2.0.3}/x_transformers/xl_autoregressive_wrapper.py RENAMED Viewed

File without changes

{x_transformers-2.0.2 → x_transformers-2.0.3}/x_transformers/xval.py RENAMED Viewed

File without changes

x-transformers 2.0.2__tar.gz → 2.0.3__tar.gz

x-transformers 2.0.2tar.gz → 2.0.3tar.gz