x-transformers 2.3.23__tar.gz → 2.3.25__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (62) hide show
  1. {x_transformers-2.3.23 → x_transformers-2.3.25}/PKG-INFO +1 -1
  2. {x_transformers-2.3.23 → x_transformers-2.3.25}/pyproject.toml +1 -1
  3. {x_transformers-2.3.23 → x_transformers-2.3.25}/x_transformers/autoregressive_wrapper.py +2 -2
  4. {x_transformers-2.3.23 → x_transformers-2.3.25}/.github/FUNDING.yml +0 -0
  5. {x_transformers-2.3.23 → x_transformers-2.3.25}/.github/workflows/python-publish.yml +0 -0
  6. {x_transformers-2.3.23 → x_transformers-2.3.25}/.github/workflows/python-test.yaml +0 -0
  7. {x_transformers-2.3.23 → x_transformers-2.3.25}/.gitignore +0 -0
  8. {x_transformers-2.3.23 → x_transformers-2.3.25}/LICENSE +0 -0
  9. {x_transformers-2.3.23 → x_transformers-2.3.25}/README.md +0 -0
  10. {x_transformers-2.3.23 → x_transformers-2.3.25}/data/README.md +0 -0
  11. {x_transformers-2.3.23 → x_transformers-2.3.25}/data/enwik8.gz +0 -0
  12. {x_transformers-2.3.23 → x_transformers-2.3.25}/images/all-attention.png +0 -0
  13. {x_transformers-2.3.23 → x_transformers-2.3.25}/images/attention-on-attention.png +0 -0
  14. {x_transformers-2.3.23 → x_transformers-2.3.25}/images/cosine-sim-attention.png +0 -0
  15. {x_transformers-2.3.23 → x_transformers-2.3.25}/images/deepnorm.png +0 -0
  16. {x_transformers-2.3.23 → x_transformers-2.3.25}/images/dynamic-pos-bias-linear.png +0 -0
  17. {x_transformers-2.3.23 → x_transformers-2.3.25}/images/dynamic-pos-bias-log.png +0 -0
  18. {x_transformers-2.3.23 → x_transformers-2.3.25}/images/dynamic-pos-bias-sinusoidal.png +0 -0
  19. {x_transformers-2.3.23 → x_transformers-2.3.25}/images/dynamic-pos-bias.png +0 -0
  20. {x_transformers-2.3.23 → x_transformers-2.3.25}/images/enhanced-recurrence.png +0 -0
  21. {x_transformers-2.3.23 → x_transformers-2.3.25}/images/fcm.png +0 -0
  22. {x_transformers-2.3.23 → x_transformers-2.3.25}/images/ffglu.png +0 -0
  23. {x_transformers-2.3.23 → x_transformers-2.3.25}/images/flash-attention.png +0 -0
  24. {x_transformers-2.3.23 → x_transformers-2.3.25}/images/gate_values.png +0 -0
  25. {x_transformers-2.3.23 → x_transformers-2.3.25}/images/gating.png +0 -0
  26. {x_transformers-2.3.23 → x_transformers-2.3.25}/images/length-extrapolation-scale.png +0 -0
  27. {x_transformers-2.3.23 → x_transformers-2.3.25}/images/macaron-1.png +0 -0
  28. {x_transformers-2.3.23 → x_transformers-2.3.25}/images/macaron-2.png +0 -0
  29. {x_transformers-2.3.23 → x_transformers-2.3.25}/images/memory-transformer.png +0 -0
  30. {x_transformers-2.3.23 → x_transformers-2.3.25}/images/normformer.png +0 -0
  31. {x_transformers-2.3.23 → x_transformers-2.3.25}/images/pia.png +0 -0
  32. {x_transformers-2.3.23 → x_transformers-2.3.25}/images/qknorm-analysis.png +0 -0
  33. {x_transformers-2.3.23 → x_transformers-2.3.25}/images/resi_dual.png +0 -0
  34. {x_transformers-2.3.23 → x_transformers-2.3.25}/images/residual_attn.png +0 -0
  35. {x_transformers-2.3.23 → x_transformers-2.3.25}/images/rezero.png +0 -0
  36. {x_transformers-2.3.23 → x_transformers-2.3.25}/images/rotary.png +0 -0
  37. {x_transformers-2.3.23 → x_transformers-2.3.25}/images/sandwich-2.png +0 -0
  38. {x_transformers-2.3.23 → x_transformers-2.3.25}/images/sandwich.png +0 -0
  39. {x_transformers-2.3.23 → x_transformers-2.3.25}/images/sandwich_norm.png +0 -0
  40. {x_transformers-2.3.23 → x_transformers-2.3.25}/images/scalenorm.png +0 -0
  41. {x_transformers-2.3.23 → x_transformers-2.3.25}/images/talking-heads.png +0 -0
  42. {x_transformers-2.3.23 → x_transformers-2.3.25}/images/topk-attention.png +0 -0
  43. {x_transformers-2.3.23 → x_transformers-2.3.25}/images/xval.png +0 -0
  44. {x_transformers-2.3.23 → x_transformers-2.3.25}/tests/test_x_transformers.py +0 -0
  45. {x_transformers-2.3.23 → x_transformers-2.3.25}/train_belief_state.py +0 -0
  46. {x_transformers-2.3.23 → x_transformers-2.3.25}/train_copy.py +0 -0
  47. {x_transformers-2.3.23 → x_transformers-2.3.25}/train_entropy_tokenizer.py +0 -0
  48. {x_transformers-2.3.23 → x_transformers-2.3.25}/train_enwik8.py +0 -0
  49. {x_transformers-2.3.23 → x_transformers-2.3.25}/train_length_extrapolate.py +0 -0
  50. {x_transformers-2.3.23 → x_transformers-2.3.25}/train_parity.py +0 -0
  51. {x_transformers-2.3.23 → x_transformers-2.3.25}/x_transformers/__init__.py +0 -0
  52. {x_transformers-2.3.23 → x_transformers-2.3.25}/x_transformers/attend.py +0 -0
  53. {x_transformers-2.3.23 → x_transformers-2.3.25}/x_transformers/belief_state_wrapper.py +0 -0
  54. {x_transformers-2.3.23 → x_transformers-2.3.25}/x_transformers/continuous.py +0 -0
  55. {x_transformers-2.3.23 → x_transformers-2.3.25}/x_transformers/dpo.py +0 -0
  56. {x_transformers-2.3.23 → x_transformers-2.3.25}/x_transformers/entropy_based_tokenizer.py +0 -0
  57. {x_transformers-2.3.23 → x_transformers-2.3.25}/x_transformers/multi_input.py +0 -0
  58. {x_transformers-2.3.23 → x_transformers-2.3.25}/x_transformers/neo_mlp.py +0 -0
  59. {x_transformers-2.3.23 → x_transformers-2.3.25}/x_transformers/nonautoregressive_wrapper.py +0 -0
  60. {x_transformers-2.3.23 → x_transformers-2.3.25}/x_transformers/x_transformers.py +0 -0
  61. {x_transformers-2.3.23 → x_transformers-2.3.25}/x_transformers/xl_autoregressive_wrapper.py +0 -0
  62. {x_transformers-2.3.23 → x_transformers-2.3.25}/x_transformers/xval.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: x-transformers
3
- Version: 2.3.23
3
+ Version: 2.3.25
4
4
  Summary: X-Transformers
5
5
  Project-URL: Homepage, https://pypi.org/project/x-transformers/
6
6
  Project-URL: Repository, https://github.com/lucidrains/x-transformers
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "x-transformers"
3
- version = "2.3.23"
3
+ version = "2.3.25"
4
4
  description = "X-Transformers"
5
5
  authors = [
6
6
  { name = "Phil Wang", email = "lucidrains@gmail.com" }
@@ -324,7 +324,7 @@ class AutoregressiveWrapper(Module):
324
324
  kwargs.update(self_attn_kv_mask = mask)
325
325
 
326
326
  out, cache = self.net(
327
- x,
327
+ inp,
328
328
  return_intermediates = True,
329
329
  return_attn_z_loss = add_attn_z_loss,
330
330
  return_next_embed_pred = add_next_embed_loss,
@@ -356,7 +356,7 @@ class AutoregressiveWrapper(Module):
356
356
  loss = loss + cache.attn_z_loss
357
357
 
358
358
  if add_next_embed_loss:
359
- mask = inp[:, :-1] != ignore_index
359
+ mask = target != ignore_index
360
360
  embed_pred = next_embed_pred[:, :-1]
361
361
  cont_targets = init_embeds[:, 1:].detach()
362
362
 
File without changes