PyPI - x-transformers - Versions diffs - 2.8.2__tar.gz → 2.8.4__tar.gz - Mend

x-transformers 2.8.2tar.gz → 2.8.4tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (65) hide show

{x_transformers-2.8.2 → x_transformers-2.8.4}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: x-transformers
-Version: 2.8.2
+Version: 2.8.4
 Summary: X-Transformers
 Project-URL: Homepage, https://pypi.org/project/x-transformers/
 Project-URL: Repository, https://github.com/lucidrains/x-transformers
@@ -2552,4 +2552,25 @@ ids_out, num_out, is_number_mask = model.generate(start_ids, start_nums, 17)
 }
 ```
+```bibtex
+@misc{jordan2024muon,
+  author    = {Keller Jordan and Yuchen Jin and Vlado Boza and Jiacheng You and Franz Cesista and Laker Newhouse and Jeremy Bernstein},
+  title     = {Muon: An optimizer for hidden layers in neural networks},
+  year      = {2024},
+  url       = {https://kellerjordan.github.io/posts/muon/}
+}
+```
+```bibtex
+@misc{wang2025muonoutperformsadamtailend,
+    title   = {Muon Outperforms Adam in Tail-End Associative Memory Learning},
+    author  = {Shuche Wang and Fengzhuo Zhang and Jiaxiang Li and Cunxiao Du and Chao Du and Tianyu Pang and Zhuoran Yang and Mingyi Hong and Vincent Y. F. Tan},
+    year    = {2025},
+    eprint  = {2509.26030},
+    archivePrefix = {arXiv},
+    primaryClass = {cs.LG},
+    url     = {https://arxiv.org/abs/2509.26030},
+}
+```
 *solve intelligence... then use that to solve everything else.* - Demis Hassabis

{x_transformers-2.8.2 → x_transformers-2.8.4}/README.md RENAMED Viewed

@@ -2504,4 +2504,25 @@ ids_out, num_out, is_number_mask = model.generate(start_ids, start_nums, 17)
 }
 ```
+```bibtex
+@misc{jordan2024muon,
+  author    = {Keller Jordan and Yuchen Jin and Vlado Boza and Jiacheng You and Franz Cesista and Laker Newhouse and Jeremy Bernstein},
+  title     = {Muon: An optimizer for hidden layers in neural networks},
+  year      = {2024},
+  url       = {https://kellerjordan.github.io/posts/muon/}
+}
+```
+```bibtex
+@misc{wang2025muonoutperformsadamtailend,
+    title   = {Muon Outperforms Adam in Tail-End Associative Memory Learning},
+    author  = {Shuche Wang and Fengzhuo Zhang and Jiaxiang Li and Cunxiao Du and Chao Du and Tianyu Pang and Zhuoran Yang and Mingyi Hong and Vincent Y. F. Tan},
+    year    = {2025},
+    eprint  = {2509.26030},
+    archivePrefix = {arXiv},
+    primaryClass = {cs.LG},
+    url     = {https://arxiv.org/abs/2509.26030},
+}
+```
 *solve intelligence... then use that to solve everything else.* - Demis Hassabis

{x_transformers-2.8.2 → x_transformers-2.8.4}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [project]
 name = "x-transformers"
-version = "2.8.2"
+version = "2.8.4"
 description = "X-Transformers"
 authors = [
     { name = "Phil Wang", email = "lucidrains@gmail.com" }

{x_transformers-2.8.2 → x_transformers-2.8.4}/tests/test_x_transformers.py RENAMED Viewed

@@ -1360,3 +1360,16 @@ def test_vae():
     style = torch.randint(0, 256, (1, 1024))
     out = model.generate(seq[:, :512], 512, seq_for_latents = style)
+def test_muon_params():
+    from x_transformers import Attention, FeedForward, Encoder
+    attn = Attention(dim = 512, dim_out = 384)
+    assert len(list(attn.muon_parameters())) == 2
+    ff = FeedForward(dim = 512)
+    assert len(list(ff.muon_parameters())) == 2
+    enc = Encoder(dim = 512, depth = 2)
+    assert len(enc.muon_parameters()) > 0

{x_transformers-2.8.2 → x_transformers-2.8.4}/x_transformers/x_transformers.py RENAMED Viewed

@@ -4,6 +4,11 @@ from typing import Callable
 import math
 from copy import deepcopy
 from random import random, randrange
+from functools import partial, wraps
+from itertools import chain
+from collections import namedtuple
+from contextlib import nullcontext
+from dataclasses import dataclass
 from packaging import version
 import torch
@@ -13,11 +18,6 @@ from torch import nn, einsum, tensor, Tensor, cat, stack, arange, is_tensor
 from torch.utils._pytree import tree_flatten, tree_unflatten, tree_map
 from torch.nn import Module, ModuleList, ModuleDict
-from functools import partial, wraps
-from collections import namedtuple
-from contextlib import nullcontext
-from dataclasses import dataclass
 from loguru import logger
 from x_transformers.attend import Attend, Intermediates
@@ -1279,6 +1279,17 @@ class FeedForward(Module):
         if zero_init_output:
             init_zero_(proj_out)
+    def muon_parameters(self):
+        weights = []
+        for m in self.modules():
+            if not isinstance(m, nn.Linear):
+                continue
+            weights.append(m.weight)
+        return weights
     def forward(
         self,
         x,
@@ -1644,6 +1655,9 @@ class Attention(Module):
         q_weight.mul_(qk_weight_scale)
         k_weight.mul_(qk_weight_scale)
+    def muon_parameters(self):
+        return chain(self.to_v.parameters(), self.to_out.parameters())
     def forward(
         self,
         x,
@@ -2479,6 +2493,17 @@ class AttentionLayers(Module):
         for attn_layer, attn_inter in zip(attn_layers, attn_intermeds):
             attn_layer.qk_clip_(attn_inter, tau = tau)
+    def muon_parameters(self):
+        params = []
+        for m in self.modules():
+            if not isinstance(m, (Attention, FeedForward)):
+                continue
+            params.extend(list(m.muon_parameters()))
+        return params
     def forward(
         self,
         x,
@@ -3216,6 +3241,9 @@ class TransformerWrapper(Module):
     ):
         self.attn_layers.attn_qk_clip_(intermediates, tau = tau)
+    def muon_parameters(self):
+        return self.attn_layers.muon_parameters()
     def forward(
         self,
         x,

{x_transformers-2.8.2 → x_transformers-2.8.4}/.github/FUNDING.yml RENAMED Viewed

File without changes

{x_transformers-2.8.2 → x_transformers-2.8.4}/.github/workflows/python-publish.yml RENAMED Viewed

File without changes

{x_transformers-2.8.2 → x_transformers-2.8.4}/.github/workflows/python-test.yaml RENAMED Viewed

File without changes

{x_transformers-2.8.2 → x_transformers-2.8.4}/.gitignore RENAMED Viewed

File without changes

{x_transformers-2.8.2 → x_transformers-2.8.4}/LICENSE RENAMED Viewed

File without changes

{x_transformers-2.8.2 → x_transformers-2.8.4}/data/README.md RENAMED Viewed

File without changes

{x_transformers-2.8.2 → x_transformers-2.8.4}/data/enwik8.gz RENAMED Viewed

File without changes

{x_transformers-2.8.2 → x_transformers-2.8.4}/images/all-attention.png RENAMED Viewed

File without changes

{x_transformers-2.8.2 → x_transformers-2.8.4}/images/attention-on-attention.png RENAMED Viewed

File without changes

{x_transformers-2.8.2 → x_transformers-2.8.4}/images/cosine-sim-attention.png RENAMED Viewed

File without changes

{x_transformers-2.8.2 → x_transformers-2.8.4}/images/deepnorm.png RENAMED Viewed

File without changes

{x_transformers-2.8.2 → x_transformers-2.8.4}/images/dynamic-pos-bias-linear.png RENAMED Viewed

File without changes

{x_transformers-2.8.2 → x_transformers-2.8.4}/images/dynamic-pos-bias-log.png RENAMED Viewed

File without changes

{x_transformers-2.8.2 → x_transformers-2.8.4}/images/dynamic-pos-bias-sinusoidal.png RENAMED Viewed

File without changes

{x_transformers-2.8.2 → x_transformers-2.8.4}/images/dynamic-pos-bias.png RENAMED Viewed

File without changes

{x_transformers-2.8.2 → x_transformers-2.8.4}/images/enhanced-recurrence.png RENAMED Viewed

File without changes

{x_transformers-2.8.2 → x_transformers-2.8.4}/images/fcm.png RENAMED Viewed

File without changes

{x_transformers-2.8.2 → x_transformers-2.8.4}/images/ffglu.png RENAMED Viewed

File without changes

{x_transformers-2.8.2 → x_transformers-2.8.4}/images/flash-attention.png RENAMED Viewed

File without changes

{x_transformers-2.8.2 → x_transformers-2.8.4}/images/gate_values.png RENAMED Viewed

File without changes

{x_transformers-2.8.2 → x_transformers-2.8.4}/images/gating.png RENAMED Viewed

File without changes

{x_transformers-2.8.2 → x_transformers-2.8.4}/images/length-extrapolation-scale.png RENAMED Viewed

File without changes

{x_transformers-2.8.2 → x_transformers-2.8.4}/images/macaron-1.png RENAMED Viewed

File without changes

{x_transformers-2.8.2 → x_transformers-2.8.4}/images/macaron-2.png RENAMED Viewed

File without changes

{x_transformers-2.8.2 → x_transformers-2.8.4}/images/memory-transformer.png RENAMED Viewed

File without changes

{x_transformers-2.8.2 → x_transformers-2.8.4}/images/normformer.png RENAMED Viewed

File without changes

{x_transformers-2.8.2 → x_transformers-2.8.4}/images/pia.png RENAMED Viewed

File without changes

{x_transformers-2.8.2 → x_transformers-2.8.4}/images/qknorm-analysis.png RENAMED Viewed

File without changes

{x_transformers-2.8.2 → x_transformers-2.8.4}/images/resi_dual.png RENAMED Viewed

File without changes

{x_transformers-2.8.2 → x_transformers-2.8.4}/images/residual_attn.png RENAMED Viewed

File without changes

{x_transformers-2.8.2 → x_transformers-2.8.4}/images/rezero.png RENAMED Viewed

File without changes

{x_transformers-2.8.2 → x_transformers-2.8.4}/images/rotary.png RENAMED Viewed

File without changes

{x_transformers-2.8.2 → x_transformers-2.8.4}/images/sandwich-2.png RENAMED Viewed

File without changes

{x_transformers-2.8.2 → x_transformers-2.8.4}/images/sandwich.png RENAMED Viewed

File without changes

{x_transformers-2.8.2 → x_transformers-2.8.4}/images/sandwich_norm.png RENAMED Viewed

File without changes

{x_transformers-2.8.2 → x_transformers-2.8.4}/images/scalenorm.png RENAMED Viewed

File without changes

{x_transformers-2.8.2 → x_transformers-2.8.4}/images/talking-heads.png RENAMED Viewed

File without changes

{x_transformers-2.8.2 → x_transformers-2.8.4}/images/topk-attention.png RENAMED Viewed

File without changes

{x_transformers-2.8.2 → x_transformers-2.8.4}/images/xval.png RENAMED Viewed

File without changes

{x_transformers-2.8.2 → x_transformers-2.8.4}/train_belief_state.py RENAMED Viewed

File without changes

{x_transformers-2.8.2 → x_transformers-2.8.4}/train_copy.py RENAMED Viewed

File without changes

{x_transformers-2.8.2 → x_transformers-2.8.4}/train_entropy_tokenizer.py RENAMED Viewed

File without changes

{x_transformers-2.8.2 → x_transformers-2.8.4}/train_enwik8.py RENAMED Viewed

File without changes

{x_transformers-2.8.2 → x_transformers-2.8.4}/train_gpt_vae.py RENAMED Viewed

File without changes

{x_transformers-2.8.2 → x_transformers-2.8.4}/train_length_extrapolate.py RENAMED Viewed

File without changes

{x_transformers-2.8.2 → x_transformers-2.8.4}/train_parity.py RENAMED Viewed

File without changes

{x_transformers-2.8.2 → x_transformers-2.8.4}/x_transformers/__init__.py RENAMED Viewed

File without changes

{x_transformers-2.8.2 → x_transformers-2.8.4}/x_transformers/attend.py RENAMED Viewed

File without changes

{x_transformers-2.8.2 → x_transformers-2.8.4}/x_transformers/autoregressive_wrapper.py RENAMED Viewed

File without changes

{x_transformers-2.8.2 → x_transformers-2.8.4}/x_transformers/belief_state_wrapper.py RENAMED Viewed

File without changes

{x_transformers-2.8.2 → x_transformers-2.8.4}/x_transformers/continuous.py RENAMED Viewed

File without changes

{x_transformers-2.8.2 → x_transformers-2.8.4}/x_transformers/dpo.py RENAMED Viewed

File without changes

{x_transformers-2.8.2 → x_transformers-2.8.4}/x_transformers/entropy_based_tokenizer.py RENAMED Viewed

File without changes

{x_transformers-2.8.2 → x_transformers-2.8.4}/x_transformers/gpt_vae.py RENAMED Viewed

File without changes

{x_transformers-2.8.2 → x_transformers-2.8.4}/x_transformers/multi_input.py RENAMED Viewed

File without changes

{x_transformers-2.8.2 → x_transformers-2.8.4}/x_transformers/neo_mlp.py RENAMED Viewed

File without changes

{x_transformers-2.8.2 → x_transformers-2.8.4}/x_transformers/nonautoregressive_wrapper.py RENAMED Viewed

File without changes

{x_transformers-2.8.2 → x_transformers-2.8.4}/x_transformers/up_wrapper.py RENAMED Viewed

File without changes

{x_transformers-2.8.2 → x_transformers-2.8.4}/x_transformers/xl_autoregressive_wrapper.py RENAMED Viewed

File without changes

{x_transformers-2.8.2 → x_transformers-2.8.4}/x_transformers/xval.py RENAMED Viewed

File without changes

x-transformers 2.8.2__tar.gz → 2.8.4__tar.gz

x-transformers 2.8.2tar.gz → 2.8.4tar.gz