x-transformers 2.8.3__tar.gz → 2.8.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. {x_transformers-2.8.3 → x_transformers-2.8.4}/PKG-INFO +1 -1
  2. {x_transformers-2.8.3 → x_transformers-2.8.4}/pyproject.toml +1 -1
  3. {x_transformers-2.8.3 → x_transformers-2.8.4}/tests/test_x_transformers.py +4 -1
  4. {x_transformers-2.8.3 → x_transformers-2.8.4}/x_transformers/x_transformers.py +14 -0
  5. {x_transformers-2.8.3 → x_transformers-2.8.4}/.github/FUNDING.yml +0 -0
  6. {x_transformers-2.8.3 → x_transformers-2.8.4}/.github/workflows/python-publish.yml +0 -0
  7. {x_transformers-2.8.3 → x_transformers-2.8.4}/.github/workflows/python-test.yaml +0 -0
  8. {x_transformers-2.8.3 → x_transformers-2.8.4}/.gitignore +0 -0
  9. {x_transformers-2.8.3 → x_transformers-2.8.4}/LICENSE +0 -0
  10. {x_transformers-2.8.3 → x_transformers-2.8.4}/README.md +0 -0
  11. {x_transformers-2.8.3 → x_transformers-2.8.4}/data/README.md +0 -0
  12. {x_transformers-2.8.3 → x_transformers-2.8.4}/data/enwik8.gz +0 -0
  13. {x_transformers-2.8.3 → x_transformers-2.8.4}/images/all-attention.png +0 -0
  14. {x_transformers-2.8.3 → x_transformers-2.8.4}/images/attention-on-attention.png +0 -0
  15. {x_transformers-2.8.3 → x_transformers-2.8.4}/images/cosine-sim-attention.png +0 -0
  16. {x_transformers-2.8.3 → x_transformers-2.8.4}/images/deepnorm.png +0 -0
  17. {x_transformers-2.8.3 → x_transformers-2.8.4}/images/dynamic-pos-bias-linear.png +0 -0
  18. {x_transformers-2.8.3 → x_transformers-2.8.4}/images/dynamic-pos-bias-log.png +0 -0
  19. {x_transformers-2.8.3 → x_transformers-2.8.4}/images/dynamic-pos-bias-sinusoidal.png +0 -0
  20. {x_transformers-2.8.3 → x_transformers-2.8.4}/images/dynamic-pos-bias.png +0 -0
  21. {x_transformers-2.8.3 → x_transformers-2.8.4}/images/enhanced-recurrence.png +0 -0
  22. {x_transformers-2.8.3 → x_transformers-2.8.4}/images/fcm.png +0 -0
  23. {x_transformers-2.8.3 → x_transformers-2.8.4}/images/ffglu.png +0 -0
  24. {x_transformers-2.8.3 → x_transformers-2.8.4}/images/flash-attention.png +0 -0
  25. {x_transformers-2.8.3 → x_transformers-2.8.4}/images/gate_values.png +0 -0
  26. {x_transformers-2.8.3 → x_transformers-2.8.4}/images/gating.png +0 -0
  27. {x_transformers-2.8.3 → x_transformers-2.8.4}/images/length-extrapolation-scale.png +0 -0
  28. {x_transformers-2.8.3 → x_transformers-2.8.4}/images/macaron-1.png +0 -0
  29. {x_transformers-2.8.3 → x_transformers-2.8.4}/images/macaron-2.png +0 -0
  30. {x_transformers-2.8.3 → x_transformers-2.8.4}/images/memory-transformer.png +0 -0
  31. {x_transformers-2.8.3 → x_transformers-2.8.4}/images/normformer.png +0 -0
  32. {x_transformers-2.8.3 → x_transformers-2.8.4}/images/pia.png +0 -0
  33. {x_transformers-2.8.3 → x_transformers-2.8.4}/images/qknorm-analysis.png +0 -0
  34. {x_transformers-2.8.3 → x_transformers-2.8.4}/images/resi_dual.png +0 -0
  35. {x_transformers-2.8.3 → x_transformers-2.8.4}/images/residual_attn.png +0 -0
  36. {x_transformers-2.8.3 → x_transformers-2.8.4}/images/rezero.png +0 -0
  37. {x_transformers-2.8.3 → x_transformers-2.8.4}/images/rotary.png +0 -0
  38. {x_transformers-2.8.3 → x_transformers-2.8.4}/images/sandwich-2.png +0 -0
  39. {x_transformers-2.8.3 → x_transformers-2.8.4}/images/sandwich.png +0 -0
  40. {x_transformers-2.8.3 → x_transformers-2.8.4}/images/sandwich_norm.png +0 -0
  41. {x_transformers-2.8.3 → x_transformers-2.8.4}/images/scalenorm.png +0 -0
  42. {x_transformers-2.8.3 → x_transformers-2.8.4}/images/talking-heads.png +0 -0
  43. {x_transformers-2.8.3 → x_transformers-2.8.4}/images/topk-attention.png +0 -0
  44. {x_transformers-2.8.3 → x_transformers-2.8.4}/images/xval.png +0 -0
  45. {x_transformers-2.8.3 → x_transformers-2.8.4}/train_belief_state.py +0 -0
  46. {x_transformers-2.8.3 → x_transformers-2.8.4}/train_copy.py +0 -0
  47. {x_transformers-2.8.3 → x_transformers-2.8.4}/train_entropy_tokenizer.py +0 -0
  48. {x_transformers-2.8.3 → x_transformers-2.8.4}/train_enwik8.py +0 -0
  49. {x_transformers-2.8.3 → x_transformers-2.8.4}/train_gpt_vae.py +0 -0
  50. {x_transformers-2.8.3 → x_transformers-2.8.4}/train_length_extrapolate.py +0 -0
  51. {x_transformers-2.8.3 → x_transformers-2.8.4}/train_parity.py +0 -0
  52. {x_transformers-2.8.3 → x_transformers-2.8.4}/x_transformers/__init__.py +0 -0
  53. {x_transformers-2.8.3 → x_transformers-2.8.4}/x_transformers/attend.py +0 -0
  54. {x_transformers-2.8.3 → x_transformers-2.8.4}/x_transformers/autoregressive_wrapper.py +0 -0
  55. {x_transformers-2.8.3 → x_transformers-2.8.4}/x_transformers/belief_state_wrapper.py +0 -0
  56. {x_transformers-2.8.3 → x_transformers-2.8.4}/x_transformers/continuous.py +0 -0
  57. {x_transformers-2.8.3 → x_transformers-2.8.4}/x_transformers/dpo.py +0 -0
  58. {x_transformers-2.8.3 → x_transformers-2.8.4}/x_transformers/entropy_based_tokenizer.py +0 -0
  59. {x_transformers-2.8.3 → x_transformers-2.8.4}/x_transformers/gpt_vae.py +0 -0
  60. {x_transformers-2.8.3 → x_transformers-2.8.4}/x_transformers/multi_input.py +0 -0
  61. {x_transformers-2.8.3 → x_transformers-2.8.4}/x_transformers/neo_mlp.py +0 -0
  62. {x_transformers-2.8.3 → x_transformers-2.8.4}/x_transformers/nonautoregressive_wrapper.py +0 -0
  63. {x_transformers-2.8.3 → x_transformers-2.8.4}/x_transformers/up_wrapper.py +0 -0
  64. {x_transformers-2.8.3 → x_transformers-2.8.4}/x_transformers/xl_autoregressive_wrapper.py +0 -0
  65. {x_transformers-2.8.3 → x_transformers-2.8.4}/x_transformers/xval.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: x-transformers
3
- Version: 2.8.3
3
+ Version: 2.8.4
4
4
  Summary: X-Transformers
5
5
  Project-URL: Homepage, https://pypi.org/project/x-transformers/
6
6
  Project-URL: Repository, https://github.com/lucidrains/x-transformers
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "x-transformers"
3
- version = "2.8.3"
3
+ version = "2.8.4"
4
4
  description = "X-Transformers"
5
5
  authors = [
6
6
  { name = "Phil Wang", email = "lucidrains@gmail.com" }
@@ -1362,7 +1362,7 @@ def test_vae():
1362
1362
  out = model.generate(seq[:, :512], 512, seq_for_latents = style)
1363
1363
 
1364
1364
  def test_muon_params():
1365
- from x_transformers import Attention, FeedForward
1365
+ from x_transformers import Attention, FeedForward, Encoder
1366
1366
 
1367
1367
  attn = Attention(dim = 512, dim_out = 384)
1368
1368
  assert len(list(attn.muon_parameters())) == 2
@@ -1370,3 +1370,6 @@ def test_muon_params():
1370
1370
  ff = FeedForward(dim = 512)
1371
1371
 
1372
1372
  assert len(list(ff.muon_parameters())) == 2
1373
+
1374
+ enc = Encoder(dim = 512, depth = 2)
1375
+ assert len(enc.muon_parameters()) > 0
@@ -2493,6 +2493,17 @@ class AttentionLayers(Module):
2493
2493
  for attn_layer, attn_inter in zip(attn_layers, attn_intermeds):
2494
2494
  attn_layer.qk_clip_(attn_inter, tau = tau)
2495
2495
 
2496
+ def muon_parameters(self):
2497
+ params = []
2498
+
2499
+ for m in self.modules():
2500
+ if not isinstance(m, (Attention, FeedForward)):
2501
+ continue
2502
+
2503
+ params.extend(list(m.muon_parameters()))
2504
+
2505
+ return params
2506
+
2496
2507
  def forward(
2497
2508
  self,
2498
2509
  x,
@@ -3230,6 +3241,9 @@ class TransformerWrapper(Module):
3230
3241
  ):
3231
3242
  self.attn_layers.attn_qk_clip_(intermediates, tau = tau)
3232
3243
 
3244
+ def muon_parameters(self):
3245
+ return self.attn_layers.muon_parameters()
3246
+
3233
3247
  def forward(
3234
3248
  self,
3235
3249
  x,
File without changes
File without changes