x-transformers 2.5.1__tar.gz → 2.5.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. {x_transformers-2.5.1 → x_transformers-2.5.3}/PKG-INFO +1 -1
  2. {x_transformers-2.5.1 → x_transformers-2.5.3}/pyproject.toml +1 -1
  3. {x_transformers-2.5.1 → x_transformers-2.5.3}/x_transformers/x_transformers.py +9 -1
  4. {x_transformers-2.5.1 → x_transformers-2.5.3}/.github/FUNDING.yml +0 -0
  5. {x_transformers-2.5.1 → x_transformers-2.5.3}/.github/workflows/python-publish.yml +0 -0
  6. {x_transformers-2.5.1 → x_transformers-2.5.3}/.github/workflows/python-test.yaml +0 -0
  7. {x_transformers-2.5.1 → x_transformers-2.5.3}/.gitignore +0 -0
  8. {x_transformers-2.5.1 → x_transformers-2.5.3}/LICENSE +0 -0
  9. {x_transformers-2.5.1 → x_transformers-2.5.3}/README.md +0 -0
  10. {x_transformers-2.5.1 → x_transformers-2.5.3}/data/README.md +0 -0
  11. {x_transformers-2.5.1 → x_transformers-2.5.3}/data/enwik8.gz +0 -0
  12. {x_transformers-2.5.1 → x_transformers-2.5.3}/images/all-attention.png +0 -0
  13. {x_transformers-2.5.1 → x_transformers-2.5.3}/images/attention-on-attention.png +0 -0
  14. {x_transformers-2.5.1 → x_transformers-2.5.3}/images/cosine-sim-attention.png +0 -0
  15. {x_transformers-2.5.1 → x_transformers-2.5.3}/images/deepnorm.png +0 -0
  16. {x_transformers-2.5.1 → x_transformers-2.5.3}/images/dynamic-pos-bias-linear.png +0 -0
  17. {x_transformers-2.5.1 → x_transformers-2.5.3}/images/dynamic-pos-bias-log.png +0 -0
  18. {x_transformers-2.5.1 → x_transformers-2.5.3}/images/dynamic-pos-bias-sinusoidal.png +0 -0
  19. {x_transformers-2.5.1 → x_transformers-2.5.3}/images/dynamic-pos-bias.png +0 -0
  20. {x_transformers-2.5.1 → x_transformers-2.5.3}/images/enhanced-recurrence.png +0 -0
  21. {x_transformers-2.5.1 → x_transformers-2.5.3}/images/fcm.png +0 -0
  22. {x_transformers-2.5.1 → x_transformers-2.5.3}/images/ffglu.png +0 -0
  23. {x_transformers-2.5.1 → x_transformers-2.5.3}/images/flash-attention.png +0 -0
  24. {x_transformers-2.5.1 → x_transformers-2.5.3}/images/gate_values.png +0 -0
  25. {x_transformers-2.5.1 → x_transformers-2.5.3}/images/gating.png +0 -0
  26. {x_transformers-2.5.1 → x_transformers-2.5.3}/images/length-extrapolation-scale.png +0 -0
  27. {x_transformers-2.5.1 → x_transformers-2.5.3}/images/macaron-1.png +0 -0
  28. {x_transformers-2.5.1 → x_transformers-2.5.3}/images/macaron-2.png +0 -0
  29. {x_transformers-2.5.1 → x_transformers-2.5.3}/images/memory-transformer.png +0 -0
  30. {x_transformers-2.5.1 → x_transformers-2.5.3}/images/normformer.png +0 -0
  31. {x_transformers-2.5.1 → x_transformers-2.5.3}/images/pia.png +0 -0
  32. {x_transformers-2.5.1 → x_transformers-2.5.3}/images/qknorm-analysis.png +0 -0
  33. {x_transformers-2.5.1 → x_transformers-2.5.3}/images/resi_dual.png +0 -0
  34. {x_transformers-2.5.1 → x_transformers-2.5.3}/images/residual_attn.png +0 -0
  35. {x_transformers-2.5.1 → x_transformers-2.5.3}/images/rezero.png +0 -0
  36. {x_transformers-2.5.1 → x_transformers-2.5.3}/images/rotary.png +0 -0
  37. {x_transformers-2.5.1 → x_transformers-2.5.3}/images/sandwich-2.png +0 -0
  38. {x_transformers-2.5.1 → x_transformers-2.5.3}/images/sandwich.png +0 -0
  39. {x_transformers-2.5.1 → x_transformers-2.5.3}/images/sandwich_norm.png +0 -0
  40. {x_transformers-2.5.1 → x_transformers-2.5.3}/images/scalenorm.png +0 -0
  41. {x_transformers-2.5.1 → x_transformers-2.5.3}/images/talking-heads.png +0 -0
  42. {x_transformers-2.5.1 → x_transformers-2.5.3}/images/topk-attention.png +0 -0
  43. {x_transformers-2.5.1 → x_transformers-2.5.3}/images/xval.png +0 -0
  44. {x_transformers-2.5.1 → x_transformers-2.5.3}/tests/test_x_transformers.py +0 -0
  45. {x_transformers-2.5.1 → x_transformers-2.5.3}/train_belief_state.py +0 -0
  46. {x_transformers-2.5.1 → x_transformers-2.5.3}/train_copy.py +0 -0
  47. {x_transformers-2.5.1 → x_transformers-2.5.3}/train_entropy_tokenizer.py +0 -0
  48. {x_transformers-2.5.1 → x_transformers-2.5.3}/train_enwik8.py +0 -0
  49. {x_transformers-2.5.1 → x_transformers-2.5.3}/train_length_extrapolate.py +0 -0
  50. {x_transformers-2.5.1 → x_transformers-2.5.3}/train_parity.py +0 -0
  51. {x_transformers-2.5.1 → x_transformers-2.5.3}/x_transformers/__init__.py +0 -0
  52. {x_transformers-2.5.1 → x_transformers-2.5.3}/x_transformers/attend.py +0 -0
  53. {x_transformers-2.5.1 → x_transformers-2.5.3}/x_transformers/autoregressive_wrapper.py +0 -0
  54. {x_transformers-2.5.1 → x_transformers-2.5.3}/x_transformers/belief_state_wrapper.py +0 -0
  55. {x_transformers-2.5.1 → x_transformers-2.5.3}/x_transformers/continuous.py +0 -0
  56. {x_transformers-2.5.1 → x_transformers-2.5.3}/x_transformers/dpo.py +0 -0
  57. {x_transformers-2.5.1 → x_transformers-2.5.3}/x_transformers/entropy_based_tokenizer.py +0 -0
  58. {x_transformers-2.5.1 → x_transformers-2.5.3}/x_transformers/multi_input.py +0 -0
  59. {x_transformers-2.5.1 → x_transformers-2.5.3}/x_transformers/neo_mlp.py +0 -0
  60. {x_transformers-2.5.1 → x_transformers-2.5.3}/x_transformers/nonautoregressive_wrapper.py +0 -0
  61. {x_transformers-2.5.1 → x_transformers-2.5.3}/x_transformers/up_wrapper.py +0 -0
  62. {x_transformers-2.5.1 → x_transformers-2.5.3}/x_transformers/xl_autoregressive_wrapper.py +0 -0
  63. {x_transformers-2.5.1 → x_transformers-2.5.3}/x_transformers/xval.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: x-transformers
3
- Version: 2.5.1
3
+ Version: 2.5.3
4
4
  Summary: X-Transformers
5
5
  Project-URL: Homepage, https://pypi.org/project/x-transformers/
6
6
  Project-URL: Repository, https://github.com/lucidrains/x-transformers
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "x-transformers"
3
- version = "2.5.1"
3
+ version = "2.5.3"
4
4
  description = "X-Transformers"
5
5
  authors = [
6
6
  { name = "Phil Wang", email = "lucidrains@gmail.com" }
@@ -2763,6 +2763,7 @@ class AttentionPool(Module):
2763
2763
  depth = 1,
2764
2764
  heads = 8,
2765
2765
  dim_head = 64,
2766
+ use_transformer_blocks = None,
2766
2767
  squeeze_output = None,
2767
2768
  attn_kwargs: dict = dict()
2768
2769
  ):
@@ -2772,9 +2773,12 @@ class AttentionPool(Module):
2772
2773
  squeeze_output = default(squeeze_output, False)
2773
2774
  assert not (squeeze_output and num_pooled_tokens > 1)
2774
2775
 
2776
+ use_transformer_blocks = default(use_transformer_blocks, depth > 1)
2777
+ assert use_transformer_blocks or depth == 1
2778
+
2775
2779
  self.queries = nn.Parameter(torch.randn(num_pooled_tokens, dim) * 1e-2)
2776
2780
 
2777
- if depth > 1:
2781
+ if use_transformer_blocks:
2778
2782
  assert not add_residual, 'residual already in effect when doing a full cross attention based transformer for pooling'
2779
2783
  attn_kwargs = {f'attn_{k}': v for k, v in attn_kwargs.items()}
2780
2784
 
@@ -2783,6 +2787,7 @@ class AttentionPool(Module):
2783
2787
  self.pooler = Attention(dim = dim, dim_context = dim_context, heads = heads, dim_head = dim_head, **attn_kwargs)
2784
2788
 
2785
2789
  self.add_residual = add_residual
2790
+ self.squeeze_output = squeeze_output
2786
2791
 
2787
2792
  def forward(self, context, mask = None):
2788
2793
  batch = context.shape[0]
@@ -2794,6 +2799,9 @@ class AttentionPool(Module):
2794
2799
  if self.add_residual:
2795
2800
  pooled = pooled + queries
2796
2801
 
2802
+ if self.squeeze_output:
2803
+ pooled = rearrange(pooled, 'b 1 d -> b d')
2804
+
2797
2805
  return pooled
2798
2806
 
2799
2807
  class ViTransformerWrapper(Module):
File without changes
File without changes