x-transformers 2.1.27__tar.gz → 2.1.28__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (60) hide show
  1. {x_transformers-2.1.27 → x_transformers-2.1.28}/PKG-INFO +1 -1
  2. {x_transformers-2.1.27 → x_transformers-2.1.28}/pyproject.toml +1 -1
  3. {x_transformers-2.1.27 → x_transformers-2.1.28}/x_transformers/belief_state_wrapper.py +4 -2
  4. {x_transformers-2.1.27 → x_transformers-2.1.28}/.github/FUNDING.yml +0 -0
  5. {x_transformers-2.1.27 → x_transformers-2.1.28}/.github/workflows/python-publish.yml +0 -0
  6. {x_transformers-2.1.27 → x_transformers-2.1.28}/.github/workflows/python-test.yaml +0 -0
  7. {x_transformers-2.1.27 → x_transformers-2.1.28}/.gitignore +0 -0
  8. {x_transformers-2.1.27 → x_transformers-2.1.28}/LICENSE +0 -0
  9. {x_transformers-2.1.27 → x_transformers-2.1.28}/README.md +0 -0
  10. {x_transformers-2.1.27 → x_transformers-2.1.28}/data/README.md +0 -0
  11. {x_transformers-2.1.27 → x_transformers-2.1.28}/data/enwik8.gz +0 -0
  12. {x_transformers-2.1.27 → x_transformers-2.1.28}/images/all-attention.png +0 -0
  13. {x_transformers-2.1.27 → x_transformers-2.1.28}/images/attention-on-attention.png +0 -0
  14. {x_transformers-2.1.27 → x_transformers-2.1.28}/images/cosine-sim-attention.png +0 -0
  15. {x_transformers-2.1.27 → x_transformers-2.1.28}/images/deepnorm.png +0 -0
  16. {x_transformers-2.1.27 → x_transformers-2.1.28}/images/dynamic-pos-bias-linear.png +0 -0
  17. {x_transformers-2.1.27 → x_transformers-2.1.28}/images/dynamic-pos-bias-log.png +0 -0
  18. {x_transformers-2.1.27 → x_transformers-2.1.28}/images/dynamic-pos-bias-sinusoidal.png +0 -0
  19. {x_transformers-2.1.27 → x_transformers-2.1.28}/images/dynamic-pos-bias.png +0 -0
  20. {x_transformers-2.1.27 → x_transformers-2.1.28}/images/enhanced-recurrence.png +0 -0
  21. {x_transformers-2.1.27 → x_transformers-2.1.28}/images/fcm.png +0 -0
  22. {x_transformers-2.1.27 → x_transformers-2.1.28}/images/ffglu.png +0 -0
  23. {x_transformers-2.1.27 → x_transformers-2.1.28}/images/flash-attention.png +0 -0
  24. {x_transformers-2.1.27 → x_transformers-2.1.28}/images/gate_values.png +0 -0
  25. {x_transformers-2.1.27 → x_transformers-2.1.28}/images/gating.png +0 -0
  26. {x_transformers-2.1.27 → x_transformers-2.1.28}/images/length-extrapolation-scale.png +0 -0
  27. {x_transformers-2.1.27 → x_transformers-2.1.28}/images/macaron-1.png +0 -0
  28. {x_transformers-2.1.27 → x_transformers-2.1.28}/images/macaron-2.png +0 -0
  29. {x_transformers-2.1.27 → x_transformers-2.1.28}/images/memory-transformer.png +0 -0
  30. {x_transformers-2.1.27 → x_transformers-2.1.28}/images/normformer.png +0 -0
  31. {x_transformers-2.1.27 → x_transformers-2.1.28}/images/pia.png +0 -0
  32. {x_transformers-2.1.27 → x_transformers-2.1.28}/images/qknorm-analysis.png +0 -0
  33. {x_transformers-2.1.27 → x_transformers-2.1.28}/images/resi_dual.png +0 -0
  34. {x_transformers-2.1.27 → x_transformers-2.1.28}/images/residual_attn.png +0 -0
  35. {x_transformers-2.1.27 → x_transformers-2.1.28}/images/rezero.png +0 -0
  36. {x_transformers-2.1.27 → x_transformers-2.1.28}/images/rotary.png +0 -0
  37. {x_transformers-2.1.27 → x_transformers-2.1.28}/images/sandwich-2.png +0 -0
  38. {x_transformers-2.1.27 → x_transformers-2.1.28}/images/sandwich.png +0 -0
  39. {x_transformers-2.1.27 → x_transformers-2.1.28}/images/sandwich_norm.png +0 -0
  40. {x_transformers-2.1.27 → x_transformers-2.1.28}/images/scalenorm.png +0 -0
  41. {x_transformers-2.1.27 → x_transformers-2.1.28}/images/talking-heads.png +0 -0
  42. {x_transformers-2.1.27 → x_transformers-2.1.28}/images/topk-attention.png +0 -0
  43. {x_transformers-2.1.27 → x_transformers-2.1.28}/images/xval.png +0 -0
  44. {x_transformers-2.1.27 → x_transformers-2.1.28}/tests/test_x_transformers.py +0 -0
  45. {x_transformers-2.1.27 → x_transformers-2.1.28}/train_belief_state.py +0 -0
  46. {x_transformers-2.1.27 → x_transformers-2.1.28}/train_copy.py +0 -0
  47. {x_transformers-2.1.27 → x_transformers-2.1.28}/train_enwik8.py +0 -0
  48. {x_transformers-2.1.27 → x_transformers-2.1.28}/train_length_extrapolate.py +0 -0
  49. {x_transformers-2.1.27 → x_transformers-2.1.28}/train_parity.py +0 -0
  50. {x_transformers-2.1.27 → x_transformers-2.1.28}/x_transformers/__init__.py +0 -0
  51. {x_transformers-2.1.27 → x_transformers-2.1.28}/x_transformers/attend.py +0 -0
  52. {x_transformers-2.1.27 → x_transformers-2.1.28}/x_transformers/autoregressive_wrapper.py +0 -0
  53. {x_transformers-2.1.27 → x_transformers-2.1.28}/x_transformers/continuous.py +0 -0
  54. {x_transformers-2.1.27 → x_transformers-2.1.28}/x_transformers/dpo.py +0 -0
  55. {x_transformers-2.1.27 → x_transformers-2.1.28}/x_transformers/multi_input.py +0 -0
  56. {x_transformers-2.1.27 → x_transformers-2.1.28}/x_transformers/neo_mlp.py +0 -0
  57. {x_transformers-2.1.27 → x_transformers-2.1.28}/x_transformers/nonautoregressive_wrapper.py +0 -0
  58. {x_transformers-2.1.27 → x_transformers-2.1.28}/x_transformers/x_transformers.py +0 -0
  59. {x_transformers-2.1.27 → x_transformers-2.1.28}/x_transformers/xl_autoregressive_wrapper.py +0 -0
  60. {x_transformers-2.1.27 → x_transformers-2.1.28}/x_transformers/xval.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: x-transformers
3
- Version: 2.1.27
3
+ Version: 2.1.28
4
4
  Summary: X-Transformers
5
5
  Project-URL: Homepage, https://pypi.org/project/x-transformers/
6
6
  Project-URL: Repository, https://github.com/lucidrains/x-transformers
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "x-transformers"
3
- version = "2.1.27"
3
+ version = "2.1.28"
4
4
  description = "X-Transformers"
5
5
  authors = [
6
6
  { name = "Phil Wang", email = "lucidrains@gmail.com" }
@@ -260,6 +260,7 @@ class BeliefStateWrapper(Module):
260
260
 
261
261
  if exists(lens):
262
262
  mask = einx.less('j, i -> i j', arange(seq_len, device = device), lens)
263
+ seq_for_labels = torch.where(mask, seq, -1)
263
264
 
264
265
  # forward autoregressive
265
266
 
@@ -319,7 +320,7 @@ class BeliefStateWrapper(Module):
319
320
 
320
321
  labels_fi, labels_bi = (fi + 1), (bi - 1)
321
322
 
322
- forward_labels, backward_labels = seq[:, labels_fi], seq[:, labels_bi]
323
+ forward_labels, backward_labels = seq_for_labels[:, labels_fi], seq_for_labels[:, labels_bi]
323
324
 
324
325
  labels = cat((forward_labels, backward_labels), dim = -1)
325
326
 
@@ -337,7 +338,8 @@ class BeliefStateWrapper(Module):
337
338
  loss = F.cross_entropy(
338
339
  rearrange(logits, 'b n (fb l) -> b l (fb n)', fb = 2),
339
340
  labels,
340
- reduction = 'none' if self.needs_loss_weight else 'mean'
341
+ reduction = 'none' if self.needs_loss_weight else 'mean',
342
+ ignore_index = -1
341
343
  )
342
344
 
343
345
  # maybe predict terminal
File without changes