x-transformers 2.3.6__tar.gz → 2.3.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. {x_transformers-2.3.6 → x_transformers-2.3.8}/PKG-INFO +1 -1
  2. {x_transformers-2.3.6 → x_transformers-2.3.8}/pyproject.toml +1 -1
  3. {x_transformers-2.3.6 → x_transformers-2.3.8}/tests/test_x_transformers.py +1 -1
  4. {x_transformers-2.3.6 → x_transformers-2.3.8}/x_transformers/x_transformers.py +4 -4
  5. {x_transformers-2.3.6 → x_transformers-2.3.8}/.github/FUNDING.yml +0 -0
  6. {x_transformers-2.3.6 → x_transformers-2.3.8}/.github/workflows/python-publish.yml +0 -0
  7. {x_transformers-2.3.6 → x_transformers-2.3.8}/.github/workflows/python-test.yaml +0 -0
  8. {x_transformers-2.3.6 → x_transformers-2.3.8}/.gitignore +0 -0
  9. {x_transformers-2.3.6 → x_transformers-2.3.8}/LICENSE +0 -0
  10. {x_transformers-2.3.6 → x_transformers-2.3.8}/README.md +0 -0
  11. {x_transformers-2.3.6 → x_transformers-2.3.8}/data/README.md +0 -0
  12. {x_transformers-2.3.6 → x_transformers-2.3.8}/data/enwik8.gz +0 -0
  13. {x_transformers-2.3.6 → x_transformers-2.3.8}/images/all-attention.png +0 -0
  14. {x_transformers-2.3.6 → x_transformers-2.3.8}/images/attention-on-attention.png +0 -0
  15. {x_transformers-2.3.6 → x_transformers-2.3.8}/images/cosine-sim-attention.png +0 -0
  16. {x_transformers-2.3.6 → x_transformers-2.3.8}/images/deepnorm.png +0 -0
  17. {x_transformers-2.3.6 → x_transformers-2.3.8}/images/dynamic-pos-bias-linear.png +0 -0
  18. {x_transformers-2.3.6 → x_transformers-2.3.8}/images/dynamic-pos-bias-log.png +0 -0
  19. {x_transformers-2.3.6 → x_transformers-2.3.8}/images/dynamic-pos-bias-sinusoidal.png +0 -0
  20. {x_transformers-2.3.6 → x_transformers-2.3.8}/images/dynamic-pos-bias.png +0 -0
  21. {x_transformers-2.3.6 → x_transformers-2.3.8}/images/enhanced-recurrence.png +0 -0
  22. {x_transformers-2.3.6 → x_transformers-2.3.8}/images/fcm.png +0 -0
  23. {x_transformers-2.3.6 → x_transformers-2.3.8}/images/ffglu.png +0 -0
  24. {x_transformers-2.3.6 → x_transformers-2.3.8}/images/flash-attention.png +0 -0
  25. {x_transformers-2.3.6 → x_transformers-2.3.8}/images/gate_values.png +0 -0
  26. {x_transformers-2.3.6 → x_transformers-2.3.8}/images/gating.png +0 -0
  27. {x_transformers-2.3.6 → x_transformers-2.3.8}/images/length-extrapolation-scale.png +0 -0
  28. {x_transformers-2.3.6 → x_transformers-2.3.8}/images/macaron-1.png +0 -0
  29. {x_transformers-2.3.6 → x_transformers-2.3.8}/images/macaron-2.png +0 -0
  30. {x_transformers-2.3.6 → x_transformers-2.3.8}/images/memory-transformer.png +0 -0
  31. {x_transformers-2.3.6 → x_transformers-2.3.8}/images/normformer.png +0 -0
  32. {x_transformers-2.3.6 → x_transformers-2.3.8}/images/pia.png +0 -0
  33. {x_transformers-2.3.6 → x_transformers-2.3.8}/images/qknorm-analysis.png +0 -0
  34. {x_transformers-2.3.6 → x_transformers-2.3.8}/images/resi_dual.png +0 -0
  35. {x_transformers-2.3.6 → x_transformers-2.3.8}/images/residual_attn.png +0 -0
  36. {x_transformers-2.3.6 → x_transformers-2.3.8}/images/rezero.png +0 -0
  37. {x_transformers-2.3.6 → x_transformers-2.3.8}/images/rotary.png +0 -0
  38. {x_transformers-2.3.6 → x_transformers-2.3.8}/images/sandwich-2.png +0 -0
  39. {x_transformers-2.3.6 → x_transformers-2.3.8}/images/sandwich.png +0 -0
  40. {x_transformers-2.3.6 → x_transformers-2.3.8}/images/sandwich_norm.png +0 -0
  41. {x_transformers-2.3.6 → x_transformers-2.3.8}/images/scalenorm.png +0 -0
  42. {x_transformers-2.3.6 → x_transformers-2.3.8}/images/talking-heads.png +0 -0
  43. {x_transformers-2.3.6 → x_transformers-2.3.8}/images/topk-attention.png +0 -0
  44. {x_transformers-2.3.6 → x_transformers-2.3.8}/images/xval.png +0 -0
  45. {x_transformers-2.3.6 → x_transformers-2.3.8}/train_belief_state.py +0 -0
  46. {x_transformers-2.3.6 → x_transformers-2.3.8}/train_copy.py +0 -0
  47. {x_transformers-2.3.6 → x_transformers-2.3.8}/train_entropy_tokenizer.py +0 -0
  48. {x_transformers-2.3.6 → x_transformers-2.3.8}/train_enwik8.py +0 -0
  49. {x_transformers-2.3.6 → x_transformers-2.3.8}/train_length_extrapolate.py +0 -0
  50. {x_transformers-2.3.6 → x_transformers-2.3.8}/train_parity.py +0 -0
  51. {x_transformers-2.3.6 → x_transformers-2.3.8}/x_transformers/__init__.py +0 -0
  52. {x_transformers-2.3.6 → x_transformers-2.3.8}/x_transformers/attend.py +0 -0
  53. {x_transformers-2.3.6 → x_transformers-2.3.8}/x_transformers/autoregressive_wrapper.py +0 -0
  54. {x_transformers-2.3.6 → x_transformers-2.3.8}/x_transformers/belief_state_wrapper.py +0 -0
  55. {x_transformers-2.3.6 → x_transformers-2.3.8}/x_transformers/continuous.py +0 -0
  56. {x_transformers-2.3.6 → x_transformers-2.3.8}/x_transformers/dpo.py +0 -0
  57. {x_transformers-2.3.6 → x_transformers-2.3.8}/x_transformers/entropy_based_tokenizer.py +0 -0
  58. {x_transformers-2.3.6 → x_transformers-2.3.8}/x_transformers/multi_input.py +0 -0
  59. {x_transformers-2.3.6 → x_transformers-2.3.8}/x_transformers/neo_mlp.py +0 -0
  60. {x_transformers-2.3.6 → x_transformers-2.3.8}/x_transformers/nonautoregressive_wrapper.py +0 -0
  61. {x_transformers-2.3.6 → x_transformers-2.3.8}/x_transformers/xl_autoregressive_wrapper.py +0 -0
  62. {x_transformers-2.3.6 → x_transformers-2.3.8}/x_transformers/xval.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: x-transformers
3
- Version: 2.3.6
3
+ Version: 2.3.8
4
4
  Summary: X-Transformers
5
5
  Project-URL: Homepage, https://pypi.org/project/x-transformers/
6
6
  Project-URL: Repository, https://github.com/lucidrains/x-transformers
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "x-transformers"
3
- version = "2.3.6"
3
+ version = "2.3.8"
4
4
  description = "X-Transformers"
5
5
  authors = [
6
6
  { name = "Phil Wang", email = "lucidrains@gmail.com" }
@@ -858,7 +858,7 @@ def test_ff_deep_embed():
858
858
  depth = 6,
859
859
  heads = 8,
860
860
  rotary_pos_emb = True,
861
- ff_deep_embed_hiddens = True,
861
+ ff_deep_embed = True,
862
862
  ff_deep_embed_num_tokens = 20000,
863
863
  )
864
864
  )
@@ -1208,7 +1208,7 @@ class FeedForward(Module):
1208
1208
  sublayer_dropout = 0.,
1209
1209
  no_bias = False,
1210
1210
  zero_init_output = False,
1211
- deep_embed_hiddens = False,
1211
+ deep_embed = False,
1212
1212
  deep_embed_num_tokens = None,
1213
1213
  ):
1214
1214
  super().__init__()
@@ -1249,9 +1249,9 @@ class FeedForward(Module):
1249
1249
  # improvements were clearest to me (on my toy setup) with multiplying on output of feedforward, will try with attention at future date
1250
1250
 
1251
1251
  self.deep_embed = None
1252
- if deep_embed_hiddens:
1252
+ if deep_embed:
1253
1253
  assert exists(deep_embed_num_tokens)
1254
- self.deep_embed = nn.Parameter(torch.zeros(deep_embed_num_tokens, dim_out))
1254
+ self.deep_embed = nn.Parameter(torch.ones(deep_embed_num_tokens, dim_out))
1255
1255
 
1256
1256
  # init last linear layer to 0
1257
1257
 
@@ -1266,7 +1266,7 @@ class FeedForward(Module):
1266
1266
  out = self.ff(x)
1267
1267
 
1268
1268
  if exists(deep_embed_ids) and exists(self.deep_embed):
1269
- deep_embed = self.deep_embed[deep_embed_ids] + 1.
1269
+ deep_embed = self.deep_embed[deep_embed_ids]
1270
1270
  out = out * deep_embed
1271
1271
 
1272
1272
  return out
File without changes
File without changes