x-transformers 2.1.19__tar.gz → 2.1.20__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. {x_transformers-2.1.19 → x_transformers-2.1.20}/PKG-INFO +1 -1
  2. {x_transformers-2.1.19 → x_transformers-2.1.20}/pyproject.toml +1 -1
  3. {x_transformers-2.1.19 → x_transformers-2.1.20}/x_transformers/belief_state_wrapper.py +3 -3
  4. {x_transformers-2.1.19 → x_transformers-2.1.20}/.github/FUNDING.yml +0 -0
  5. {x_transformers-2.1.19 → x_transformers-2.1.20}/.github/workflows/python-publish.yml +0 -0
  6. {x_transformers-2.1.19 → x_transformers-2.1.20}/.github/workflows/python-test.yaml +0 -0
  7. {x_transformers-2.1.19 → x_transformers-2.1.20}/.gitignore +0 -0
  8. {x_transformers-2.1.19 → x_transformers-2.1.20}/LICENSE +0 -0
  9. {x_transformers-2.1.19 → x_transformers-2.1.20}/README.md +0 -0
  10. {x_transformers-2.1.19 → x_transformers-2.1.20}/data/README.md +0 -0
  11. {x_transformers-2.1.19 → x_transformers-2.1.20}/data/enwik8.gz +0 -0
  12. {x_transformers-2.1.19 → x_transformers-2.1.20}/images/all-attention.png +0 -0
  13. {x_transformers-2.1.19 → x_transformers-2.1.20}/images/attention-on-attention.png +0 -0
  14. {x_transformers-2.1.19 → x_transformers-2.1.20}/images/cosine-sim-attention.png +0 -0
  15. {x_transformers-2.1.19 → x_transformers-2.1.20}/images/deepnorm.png +0 -0
  16. {x_transformers-2.1.19 → x_transformers-2.1.20}/images/dynamic-pos-bias-linear.png +0 -0
  17. {x_transformers-2.1.19 → x_transformers-2.1.20}/images/dynamic-pos-bias-log.png +0 -0
  18. {x_transformers-2.1.19 → x_transformers-2.1.20}/images/dynamic-pos-bias-sinusoidal.png +0 -0
  19. {x_transformers-2.1.19 → x_transformers-2.1.20}/images/dynamic-pos-bias.png +0 -0
  20. {x_transformers-2.1.19 → x_transformers-2.1.20}/images/enhanced-recurrence.png +0 -0
  21. {x_transformers-2.1.19 → x_transformers-2.1.20}/images/fcm.png +0 -0
  22. {x_transformers-2.1.19 → x_transformers-2.1.20}/images/ffglu.png +0 -0
  23. {x_transformers-2.1.19 → x_transformers-2.1.20}/images/flash-attention.png +0 -0
  24. {x_transformers-2.1.19 → x_transformers-2.1.20}/images/gate_values.png +0 -0
  25. {x_transformers-2.1.19 → x_transformers-2.1.20}/images/gating.png +0 -0
  26. {x_transformers-2.1.19 → x_transformers-2.1.20}/images/length-extrapolation-scale.png +0 -0
  27. {x_transformers-2.1.19 → x_transformers-2.1.20}/images/macaron-1.png +0 -0
  28. {x_transformers-2.1.19 → x_transformers-2.1.20}/images/macaron-2.png +0 -0
  29. {x_transformers-2.1.19 → x_transformers-2.1.20}/images/memory-transformer.png +0 -0
  30. {x_transformers-2.1.19 → x_transformers-2.1.20}/images/normformer.png +0 -0
  31. {x_transformers-2.1.19 → x_transformers-2.1.20}/images/pia.png +0 -0
  32. {x_transformers-2.1.19 → x_transformers-2.1.20}/images/qknorm-analysis.png +0 -0
  33. {x_transformers-2.1.19 → x_transformers-2.1.20}/images/resi_dual.png +0 -0
  34. {x_transformers-2.1.19 → x_transformers-2.1.20}/images/residual_attn.png +0 -0
  35. {x_transformers-2.1.19 → x_transformers-2.1.20}/images/rezero.png +0 -0
  36. {x_transformers-2.1.19 → x_transformers-2.1.20}/images/rotary.png +0 -0
  37. {x_transformers-2.1.19 → x_transformers-2.1.20}/images/sandwich-2.png +0 -0
  38. {x_transformers-2.1.19 → x_transformers-2.1.20}/images/sandwich.png +0 -0
  39. {x_transformers-2.1.19 → x_transformers-2.1.20}/images/sandwich_norm.png +0 -0
  40. {x_transformers-2.1.19 → x_transformers-2.1.20}/images/scalenorm.png +0 -0
  41. {x_transformers-2.1.19 → x_transformers-2.1.20}/images/talking-heads.png +0 -0
  42. {x_transformers-2.1.19 → x_transformers-2.1.20}/images/topk-attention.png +0 -0
  43. {x_transformers-2.1.19 → x_transformers-2.1.20}/images/xval.png +0 -0
  44. {x_transformers-2.1.19 → x_transformers-2.1.20}/tests/test_x_transformers.py +0 -0
  45. {x_transformers-2.1.19 → x_transformers-2.1.20}/train_belief_state.py +0 -0
  46. {x_transformers-2.1.19 → x_transformers-2.1.20}/train_copy.py +0 -0
  47. {x_transformers-2.1.19 → x_transformers-2.1.20}/train_enwik8.py +0 -0
  48. {x_transformers-2.1.19 → x_transformers-2.1.20}/train_length_extrapolate.py +0 -0
  49. {x_transformers-2.1.19 → x_transformers-2.1.20}/train_parity.py +0 -0
  50. {x_transformers-2.1.19 → x_transformers-2.1.20}/x_transformers/__init__.py +0 -0
  51. {x_transformers-2.1.19 → x_transformers-2.1.20}/x_transformers/attend.py +0 -0
  52. {x_transformers-2.1.19 → x_transformers-2.1.20}/x_transformers/autoregressive_wrapper.py +0 -0
  53. {x_transformers-2.1.19 → x_transformers-2.1.20}/x_transformers/continuous.py +0 -0
  54. {x_transformers-2.1.19 → x_transformers-2.1.20}/x_transformers/dpo.py +0 -0
  55. {x_transformers-2.1.19 → x_transformers-2.1.20}/x_transformers/multi_input.py +0 -0
  56. {x_transformers-2.1.19 → x_transformers-2.1.20}/x_transformers/neo_mlp.py +0 -0
  57. {x_transformers-2.1.19 → x_transformers-2.1.20}/x_transformers/nonautoregressive_wrapper.py +0 -0
  58. {x_transformers-2.1.19 → x_transformers-2.1.20}/x_transformers/x_transformers.py +0 -0
  59. {x_transformers-2.1.19 → x_transformers-2.1.20}/x_transformers/xl_autoregressive_wrapper.py +0 -0
  60. {x_transformers-2.1.19 → x_transformers-2.1.20}/x_transformers/xval.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: x-transformers
3
- Version: 2.1.19
3
+ Version: 2.1.20
4
4
  Summary: X-Transformers
5
5
  Project-URL: Homepage, https://pypi.org/project/x-transformers/
6
6
  Project-URL: Repository, https://github.com/lucidrains/x-transformers
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "x-transformers"
3
- version = "2.1.19"
3
+ version = "2.1.20"
4
4
  description = "X-Transformers"
5
5
  authors = [
6
6
  { name = "Phil Wang", email = "lucidrains@gmail.com" }
@@ -245,7 +245,7 @@ class BeliefStateWrapper(Module):
245
245
 
246
246
  seq_arange = arange(seq_len, device = device)
247
247
 
248
- fb_pairs = cartesian_prod(seq_arange, seq_arange)
248
+ fb_pairs = cartesian_prod(seq_arange, seq_arange + 1) # plus one for suffix token
249
249
 
250
250
  # filter down to valid pairs, as in figure 11
251
251
  # f - forward, b - backward, i - indices
@@ -271,7 +271,7 @@ class BeliefStateWrapper(Module):
271
271
 
272
272
  fi, bi = fb_pairs.unbind(dim = -1)
273
273
 
274
- labels_fi, labels_bi = (fi + 1), bi
274
+ labels_fi, labels_bi = (fi + 1), (bi - 1)
275
275
 
276
276
  forward_labels, backward_labels = seq[:, labels_fi], seq[:, labels_bi]
277
277
 
@@ -281,7 +281,7 @@ class BeliefStateWrapper(Module):
281
281
 
282
282
  fb_embeds = cat((
283
283
  forward_embeds[:, fi],
284
- backward_embeds[:, bi + 1] # needs plus one for auto matically added suffix token
284
+ backward_embeds[:, bi]
285
285
  ), dim = -1)
286
286
 
287
287
  logits = self.text_head(fb_embeds)
File without changes