x-transformers 2.11.10__tar.gz → 2.11.12__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (68) hide show
  1. {x_transformers-2.11.10 → x_transformers-2.11.12}/PKG-INFO +1 -1
  2. {x_transformers-2.11.10 → x_transformers-2.11.12}/pyproject.toml +1 -1
  3. {x_transformers-2.11.10 → x_transformers-2.11.12}/x_transformers/x_transformers.py +5 -1
  4. {x_transformers-2.11.10 → x_transformers-2.11.12}/.github/FUNDING.yml +0 -0
  5. {x_transformers-2.11.10 → x_transformers-2.11.12}/.github/workflows/python-publish.yml +0 -0
  6. {x_transformers-2.11.10 → x_transformers-2.11.12}/.github/workflows/python-test.yaml +0 -0
  7. {x_transformers-2.11.10 → x_transformers-2.11.12}/.gitignore +0 -0
  8. {x_transformers-2.11.10 → x_transformers-2.11.12}/LICENSE +0 -0
  9. {x_transformers-2.11.10 → x_transformers-2.11.12}/README.md +0 -0
  10. {x_transformers-2.11.10 → x_transformers-2.11.12}/data/README.md +0 -0
  11. {x_transformers-2.11.10 → x_transformers-2.11.12}/data/enwik8.gz +0 -0
  12. {x_transformers-2.11.10 → x_transformers-2.11.12}/images/all-attention.png +0 -0
  13. {x_transformers-2.11.10 → x_transformers-2.11.12}/images/attention-on-attention.png +0 -0
  14. {x_transformers-2.11.10 → x_transformers-2.11.12}/images/cosine-sim-attention.png +0 -0
  15. {x_transformers-2.11.10 → x_transformers-2.11.12}/images/deepnorm.png +0 -0
  16. {x_transformers-2.11.10 → x_transformers-2.11.12}/images/dynamic-pos-bias-linear.png +0 -0
  17. {x_transformers-2.11.10 → x_transformers-2.11.12}/images/dynamic-pos-bias-log.png +0 -0
  18. {x_transformers-2.11.10 → x_transformers-2.11.12}/images/dynamic-pos-bias-sinusoidal.png +0 -0
  19. {x_transformers-2.11.10 → x_transformers-2.11.12}/images/dynamic-pos-bias.png +0 -0
  20. {x_transformers-2.11.10 → x_transformers-2.11.12}/images/enhanced-recurrence.png +0 -0
  21. {x_transformers-2.11.10 → x_transformers-2.11.12}/images/fcm.png +0 -0
  22. {x_transformers-2.11.10 → x_transformers-2.11.12}/images/ffglu.png +0 -0
  23. {x_transformers-2.11.10 → x_transformers-2.11.12}/images/flash-attention.png +0 -0
  24. {x_transformers-2.11.10 → x_transformers-2.11.12}/images/gate_values.png +0 -0
  25. {x_transformers-2.11.10 → x_transformers-2.11.12}/images/gating.png +0 -0
  26. {x_transformers-2.11.10 → x_transformers-2.11.12}/images/length-extrapolation-scale.png +0 -0
  27. {x_transformers-2.11.10 → x_transformers-2.11.12}/images/macaron-1.png +0 -0
  28. {x_transformers-2.11.10 → x_transformers-2.11.12}/images/macaron-2.png +0 -0
  29. {x_transformers-2.11.10 → x_transformers-2.11.12}/images/memory-transformer.png +0 -0
  30. {x_transformers-2.11.10 → x_transformers-2.11.12}/images/normformer.png +0 -0
  31. {x_transformers-2.11.10 → x_transformers-2.11.12}/images/pia.png +0 -0
  32. {x_transformers-2.11.10 → x_transformers-2.11.12}/images/qknorm-analysis.png +0 -0
  33. {x_transformers-2.11.10 → x_transformers-2.11.12}/images/resi_dual.png +0 -0
  34. {x_transformers-2.11.10 → x_transformers-2.11.12}/images/residual_attn.png +0 -0
  35. {x_transformers-2.11.10 → x_transformers-2.11.12}/images/rezero.png +0 -0
  36. {x_transformers-2.11.10 → x_transformers-2.11.12}/images/rotary.png +0 -0
  37. {x_transformers-2.11.10 → x_transformers-2.11.12}/images/sandwich-2.png +0 -0
  38. {x_transformers-2.11.10 → x_transformers-2.11.12}/images/sandwich.png +0 -0
  39. {x_transformers-2.11.10 → x_transformers-2.11.12}/images/sandwich_norm.png +0 -0
  40. {x_transformers-2.11.10 → x_transformers-2.11.12}/images/scalenorm.png +0 -0
  41. {x_transformers-2.11.10 → x_transformers-2.11.12}/images/talking-heads.png +0 -0
  42. {x_transformers-2.11.10 → x_transformers-2.11.12}/images/topk-attention.png +0 -0
  43. {x_transformers-2.11.10 → x_transformers-2.11.12}/images/xval.png +0 -0
  44. {x_transformers-2.11.10 → x_transformers-2.11.12}/tests/test_x_transformers.py +0 -0
  45. {x_transformers-2.11.10 → x_transformers-2.11.12}/train_belief_state.py +0 -0
  46. {x_transformers-2.11.10 → x_transformers-2.11.12}/train_copy.py +0 -0
  47. {x_transformers-2.11.10 → x_transformers-2.11.12}/train_entropy_tokenizer.py +0 -0
  48. {x_transformers-2.11.10 → x_transformers-2.11.12}/train_enwik8.py +0 -0
  49. {x_transformers-2.11.10 → x_transformers-2.11.12}/train_free.py +0 -0
  50. {x_transformers-2.11.10 → x_transformers-2.11.12}/train_gpt_vae.py +0 -0
  51. {x_transformers-2.11.10 → x_transformers-2.11.12}/train_length_extrapolate.py +0 -0
  52. {x_transformers-2.11.10 → x_transformers-2.11.12}/train_parity.py +0 -0
  53. {x_transformers-2.11.10 → x_transformers-2.11.12}/train_with_muon.py +0 -0
  54. {x_transformers-2.11.10 → x_transformers-2.11.12}/x_transformers/__init__.py +0 -0
  55. {x_transformers-2.11.10 → x_transformers-2.11.12}/x_transformers/attend.py +0 -0
  56. {x_transformers-2.11.10 → x_transformers-2.11.12}/x_transformers/autoregressive_wrapper.py +0 -0
  57. {x_transformers-2.11.10 → x_transformers-2.11.12}/x_transformers/belief_state_wrapper.py +0 -0
  58. {x_transformers-2.11.10 → x_transformers-2.11.12}/x_transformers/continuous.py +0 -0
  59. {x_transformers-2.11.10 → x_transformers-2.11.12}/x_transformers/dpo.py +0 -0
  60. {x_transformers-2.11.10 → x_transformers-2.11.12}/x_transformers/entropy_based_tokenizer.py +0 -0
  61. {x_transformers-2.11.10 → x_transformers-2.11.12}/x_transformers/free_transformer.py +0 -0
  62. {x_transformers-2.11.10 → x_transformers-2.11.12}/x_transformers/gpt_vae.py +0 -0
  63. {x_transformers-2.11.10 → x_transformers-2.11.12}/x_transformers/multi_input.py +0 -0
  64. {x_transformers-2.11.10 → x_transformers-2.11.12}/x_transformers/neo_mlp.py +0 -0
  65. {x_transformers-2.11.10 → x_transformers-2.11.12}/x_transformers/nonautoregressive_wrapper.py +0 -0
  66. {x_transformers-2.11.10 → x_transformers-2.11.12}/x_transformers/up_wrapper.py +0 -0
  67. {x_transformers-2.11.10 → x_transformers-2.11.12}/x_transformers/xl_autoregressive_wrapper.py +0 -0
  68. {x_transformers-2.11.10 → x_transformers-2.11.12}/x_transformers/xval.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: x-transformers
3
- Version: 2.11.10
3
+ Version: 2.11.12
4
4
  Summary: X-Transformers
5
5
  Project-URL: Homepage, https://pypi.org/project/x-transformers/
6
6
  Project-URL: Repository, https://github.com/lucidrains/x-transformers
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "x-transformers"
3
- version = "2.11.10"
3
+ version = "2.11.12"
4
4
  description = "X-Transformers"
5
5
  authors = [
6
6
  { name = "Phil Wang", email = "lucidrains@gmail.com" }
@@ -740,11 +740,14 @@ def apply_rotary_pos_emb(t, freqs, scale = 1):
740
740
  rot_dim, seq_len, orig_dtype = freqs.shape[-1], t.shape[-2], t.dtype
741
741
 
742
742
  freqs = freqs[:, -seq_len:, :]
743
- scale = scale[:, -seq_len:, :] if isinstance(scale, torch.Tensor) else scale
743
+ scale = scale[:, -seq_len:, :] if is_tensor(scale) else scale
744
744
 
745
745
  if t.ndim == 4 and freqs.ndim == 3:
746
746
  freqs = rearrange(freqs, 'b n d -> b 1 n d')
747
747
 
748
+ if is_tensor(scale):
749
+ scale = rearrange(scale, 'b n d -> b 1 n d')
750
+
748
751
  # partial rotary embeddings, Wang et al. GPT-J
749
752
  t, t_unrotated = t[..., :rot_dim], t[..., rot_dim:]
750
753
  t = (t * freqs.cos() * scale) + (rotate_half(t) * freqs.sin() * scale)
@@ -3438,6 +3441,7 @@ class TransformerWrapper(Module):
3438
3441
 
3439
3442
  kwargs = dict(
3440
3443
  **kwargs,
3444
+ pos = pos,
3441
3445
  seq_pos_offset = seq_pos_offset,
3442
3446
  seq_start_pos = seq_start_pos,
3443
3447
  input_not_include_cache = input_not_include_cache