x-transformers 2.7.5__tar.gz → 2.7.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. {x_transformers-2.7.5 → x_transformers-2.7.6}/PKG-INFO +1 -1
  2. {x_transformers-2.7.5 → x_transformers-2.7.6}/pyproject.toml +1 -1
  3. {x_transformers-2.7.5 → x_transformers-2.7.6}/x_transformers/x_transformers.py +7 -4
  4. {x_transformers-2.7.5 → x_transformers-2.7.6}/.github/FUNDING.yml +0 -0
  5. {x_transformers-2.7.5 → x_transformers-2.7.6}/.github/workflows/python-publish.yml +0 -0
  6. {x_transformers-2.7.5 → x_transformers-2.7.6}/.github/workflows/python-test.yaml +0 -0
  7. {x_transformers-2.7.5 → x_transformers-2.7.6}/.gitignore +0 -0
  8. {x_transformers-2.7.5 → x_transformers-2.7.6}/LICENSE +0 -0
  9. {x_transformers-2.7.5 → x_transformers-2.7.6}/README.md +0 -0
  10. {x_transformers-2.7.5 → x_transformers-2.7.6}/data/README.md +0 -0
  11. {x_transformers-2.7.5 → x_transformers-2.7.6}/data/enwik8.gz +0 -0
  12. {x_transformers-2.7.5 → x_transformers-2.7.6}/images/all-attention.png +0 -0
  13. {x_transformers-2.7.5 → x_transformers-2.7.6}/images/attention-on-attention.png +0 -0
  14. {x_transformers-2.7.5 → x_transformers-2.7.6}/images/cosine-sim-attention.png +0 -0
  15. {x_transformers-2.7.5 → x_transformers-2.7.6}/images/deepnorm.png +0 -0
  16. {x_transformers-2.7.5 → x_transformers-2.7.6}/images/dynamic-pos-bias-linear.png +0 -0
  17. {x_transformers-2.7.5 → x_transformers-2.7.6}/images/dynamic-pos-bias-log.png +0 -0
  18. {x_transformers-2.7.5 → x_transformers-2.7.6}/images/dynamic-pos-bias-sinusoidal.png +0 -0
  19. {x_transformers-2.7.5 → x_transformers-2.7.6}/images/dynamic-pos-bias.png +0 -0
  20. {x_transformers-2.7.5 → x_transformers-2.7.6}/images/enhanced-recurrence.png +0 -0
  21. {x_transformers-2.7.5 → x_transformers-2.7.6}/images/fcm.png +0 -0
  22. {x_transformers-2.7.5 → x_transformers-2.7.6}/images/ffglu.png +0 -0
  23. {x_transformers-2.7.5 → x_transformers-2.7.6}/images/flash-attention.png +0 -0
  24. {x_transformers-2.7.5 → x_transformers-2.7.6}/images/gate_values.png +0 -0
  25. {x_transformers-2.7.5 → x_transformers-2.7.6}/images/gating.png +0 -0
  26. {x_transformers-2.7.5 → x_transformers-2.7.6}/images/length-extrapolation-scale.png +0 -0
  27. {x_transformers-2.7.5 → x_transformers-2.7.6}/images/macaron-1.png +0 -0
  28. {x_transformers-2.7.5 → x_transformers-2.7.6}/images/macaron-2.png +0 -0
  29. {x_transformers-2.7.5 → x_transformers-2.7.6}/images/memory-transformer.png +0 -0
  30. {x_transformers-2.7.5 → x_transformers-2.7.6}/images/normformer.png +0 -0
  31. {x_transformers-2.7.5 → x_transformers-2.7.6}/images/pia.png +0 -0
  32. {x_transformers-2.7.5 → x_transformers-2.7.6}/images/qknorm-analysis.png +0 -0
  33. {x_transformers-2.7.5 → x_transformers-2.7.6}/images/resi_dual.png +0 -0
  34. {x_transformers-2.7.5 → x_transformers-2.7.6}/images/residual_attn.png +0 -0
  35. {x_transformers-2.7.5 → x_transformers-2.7.6}/images/rezero.png +0 -0
  36. {x_transformers-2.7.5 → x_transformers-2.7.6}/images/rotary.png +0 -0
  37. {x_transformers-2.7.5 → x_transformers-2.7.6}/images/sandwich-2.png +0 -0
  38. {x_transformers-2.7.5 → x_transformers-2.7.6}/images/sandwich.png +0 -0
  39. {x_transformers-2.7.5 → x_transformers-2.7.6}/images/sandwich_norm.png +0 -0
  40. {x_transformers-2.7.5 → x_transformers-2.7.6}/images/scalenorm.png +0 -0
  41. {x_transformers-2.7.5 → x_transformers-2.7.6}/images/talking-heads.png +0 -0
  42. {x_transformers-2.7.5 → x_transformers-2.7.6}/images/topk-attention.png +0 -0
  43. {x_transformers-2.7.5 → x_transformers-2.7.6}/images/xval.png +0 -0
  44. {x_transformers-2.7.5 → x_transformers-2.7.6}/tests/test_x_transformers.py +0 -0
  45. {x_transformers-2.7.5 → x_transformers-2.7.6}/train_belief_state.py +0 -0
  46. {x_transformers-2.7.5 → x_transformers-2.7.6}/train_copy.py +0 -0
  47. {x_transformers-2.7.5 → x_transformers-2.7.6}/train_entropy_tokenizer.py +0 -0
  48. {x_transformers-2.7.5 → x_transformers-2.7.6}/train_enwik8.py +0 -0
  49. {x_transformers-2.7.5 → x_transformers-2.7.6}/train_length_extrapolate.py +0 -0
  50. {x_transformers-2.7.5 → x_transformers-2.7.6}/train_parity.py +0 -0
  51. {x_transformers-2.7.5 → x_transformers-2.7.6}/x_transformers/__init__.py +0 -0
  52. {x_transformers-2.7.5 → x_transformers-2.7.6}/x_transformers/attend.py +0 -0
  53. {x_transformers-2.7.5 → x_transformers-2.7.6}/x_transformers/autoregressive_wrapper.py +0 -0
  54. {x_transformers-2.7.5 → x_transformers-2.7.6}/x_transformers/belief_state_wrapper.py +0 -0
  55. {x_transformers-2.7.5 → x_transformers-2.7.6}/x_transformers/continuous.py +0 -0
  56. {x_transformers-2.7.5 → x_transformers-2.7.6}/x_transformers/dpo.py +0 -0
  57. {x_transformers-2.7.5 → x_transformers-2.7.6}/x_transformers/entropy_based_tokenizer.py +0 -0
  58. {x_transformers-2.7.5 → x_transformers-2.7.6}/x_transformers/multi_input.py +0 -0
  59. {x_transformers-2.7.5 → x_transformers-2.7.6}/x_transformers/neo_mlp.py +0 -0
  60. {x_transformers-2.7.5 → x_transformers-2.7.6}/x_transformers/nonautoregressive_wrapper.py +0 -0
  61. {x_transformers-2.7.5 → x_transformers-2.7.6}/x_transformers/up_wrapper.py +0 -0
  62. {x_transformers-2.7.5 → x_transformers-2.7.6}/x_transformers/xl_autoregressive_wrapper.py +0 -0
  63. {x_transformers-2.7.5 → x_transformers-2.7.6}/x_transformers/xval.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: x-transformers
3
- Version: 2.7.5
3
+ Version: 2.7.6
4
4
  Summary: X-Transformers
5
5
  Project-URL: Homepage, https://pypi.org/project/x-transformers/
6
6
  Project-URL: Repository, https://github.com/lucidrains/x-transformers
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "x-transformers"
3
- version = "2.7.5"
3
+ version = "2.7.6"
4
4
  description = "X-Transformers"
5
5
  authors = [
6
6
  { name = "Phil Wang", email = "lucidrains@gmail.com" }
@@ -2469,12 +2469,15 @@ class AttentionLayers(Module):
2469
2469
  ):
2470
2470
  # pairs up the attention intermediates with each attention module and does qk clip proposed by kimi team
2471
2471
 
2472
- for (_, layer, _), layer_type, attn_inter in zip(self.layers, self.layer_types, intermediates.attn_intermediates):
2472
+ layer_and_layer_types = (self.layers, self.layer_types)
2473
2473
 
2474
- if layer_type not in ('a', 'c'):
2475
- continue
2474
+ attn_layers = [layer for (_, layer, _), layer_type in zip(self.layers, self.layer_types) if layer_type in ('a', 'c')]
2475
+ attn_intermeds = intermediates.attn_intermediates
2476
+
2477
+ assert len(attn_layers) == len(attn_intermeds)
2476
2478
 
2477
- layer.qk_clip_(attn_inter, tau = tau)
2479
+ for attn_layer, attn_inter in zip(attn_layers, attn_intermeds):
2480
+ attn_layer.qk_clip_(attn_inter, tau = tau)
2478
2481
 
2479
2482
  def forward(
2480
2483
  self,
File without changes
File without changes