x-transformers 2.11.1__tar.gz → 2.11.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of x-transformers might be problematic. Click here for more details.

Files changed (68) hide show
  1. {x_transformers-2.11.1 → x_transformers-2.11.2}/PKG-INFO +1 -1
  2. {x_transformers-2.11.1 → x_transformers-2.11.2}/pyproject.toml +1 -1
  3. {x_transformers-2.11.1 → x_transformers-2.11.2}/x_transformers/gpt_vae.py +8 -2
  4. {x_transformers-2.11.1 → x_transformers-2.11.2}/.github/FUNDING.yml +0 -0
  5. {x_transformers-2.11.1 → x_transformers-2.11.2}/.github/workflows/python-publish.yml +0 -0
  6. {x_transformers-2.11.1 → x_transformers-2.11.2}/.github/workflows/python-test.yaml +0 -0
  7. {x_transformers-2.11.1 → x_transformers-2.11.2}/.gitignore +0 -0
  8. {x_transformers-2.11.1 → x_transformers-2.11.2}/LICENSE +0 -0
  9. {x_transformers-2.11.1 → x_transformers-2.11.2}/README.md +0 -0
  10. {x_transformers-2.11.1 → x_transformers-2.11.2}/data/README.md +0 -0
  11. {x_transformers-2.11.1 → x_transformers-2.11.2}/data/enwik8.gz +0 -0
  12. {x_transformers-2.11.1 → x_transformers-2.11.2}/images/all-attention.png +0 -0
  13. {x_transformers-2.11.1 → x_transformers-2.11.2}/images/attention-on-attention.png +0 -0
  14. {x_transformers-2.11.1 → x_transformers-2.11.2}/images/cosine-sim-attention.png +0 -0
  15. {x_transformers-2.11.1 → x_transformers-2.11.2}/images/deepnorm.png +0 -0
  16. {x_transformers-2.11.1 → x_transformers-2.11.2}/images/dynamic-pos-bias-linear.png +0 -0
  17. {x_transformers-2.11.1 → x_transformers-2.11.2}/images/dynamic-pos-bias-log.png +0 -0
  18. {x_transformers-2.11.1 → x_transformers-2.11.2}/images/dynamic-pos-bias-sinusoidal.png +0 -0
  19. {x_transformers-2.11.1 → x_transformers-2.11.2}/images/dynamic-pos-bias.png +0 -0
  20. {x_transformers-2.11.1 → x_transformers-2.11.2}/images/enhanced-recurrence.png +0 -0
  21. {x_transformers-2.11.1 → x_transformers-2.11.2}/images/fcm.png +0 -0
  22. {x_transformers-2.11.1 → x_transformers-2.11.2}/images/ffglu.png +0 -0
  23. {x_transformers-2.11.1 → x_transformers-2.11.2}/images/flash-attention.png +0 -0
  24. {x_transformers-2.11.1 → x_transformers-2.11.2}/images/gate_values.png +0 -0
  25. {x_transformers-2.11.1 → x_transformers-2.11.2}/images/gating.png +0 -0
  26. {x_transformers-2.11.1 → x_transformers-2.11.2}/images/length-extrapolation-scale.png +0 -0
  27. {x_transformers-2.11.1 → x_transformers-2.11.2}/images/macaron-1.png +0 -0
  28. {x_transformers-2.11.1 → x_transformers-2.11.2}/images/macaron-2.png +0 -0
  29. {x_transformers-2.11.1 → x_transformers-2.11.2}/images/memory-transformer.png +0 -0
  30. {x_transformers-2.11.1 → x_transformers-2.11.2}/images/normformer.png +0 -0
  31. {x_transformers-2.11.1 → x_transformers-2.11.2}/images/pia.png +0 -0
  32. {x_transformers-2.11.1 → x_transformers-2.11.2}/images/qknorm-analysis.png +0 -0
  33. {x_transformers-2.11.1 → x_transformers-2.11.2}/images/resi_dual.png +0 -0
  34. {x_transformers-2.11.1 → x_transformers-2.11.2}/images/residual_attn.png +0 -0
  35. {x_transformers-2.11.1 → x_transformers-2.11.2}/images/rezero.png +0 -0
  36. {x_transformers-2.11.1 → x_transformers-2.11.2}/images/rotary.png +0 -0
  37. {x_transformers-2.11.1 → x_transformers-2.11.2}/images/sandwich-2.png +0 -0
  38. {x_transformers-2.11.1 → x_transformers-2.11.2}/images/sandwich.png +0 -0
  39. {x_transformers-2.11.1 → x_transformers-2.11.2}/images/sandwich_norm.png +0 -0
  40. {x_transformers-2.11.1 → x_transformers-2.11.2}/images/scalenorm.png +0 -0
  41. {x_transformers-2.11.1 → x_transformers-2.11.2}/images/talking-heads.png +0 -0
  42. {x_transformers-2.11.1 → x_transformers-2.11.2}/images/topk-attention.png +0 -0
  43. {x_transformers-2.11.1 → x_transformers-2.11.2}/images/xval.png +0 -0
  44. {x_transformers-2.11.1 → x_transformers-2.11.2}/tests/test_x_transformers.py +0 -0
  45. {x_transformers-2.11.1 → x_transformers-2.11.2}/train_belief_state.py +0 -0
  46. {x_transformers-2.11.1 → x_transformers-2.11.2}/train_copy.py +0 -0
  47. {x_transformers-2.11.1 → x_transformers-2.11.2}/train_entropy_tokenizer.py +0 -0
  48. {x_transformers-2.11.1 → x_transformers-2.11.2}/train_enwik8.py +0 -0
  49. {x_transformers-2.11.1 → x_transformers-2.11.2}/train_free.py +0 -0
  50. {x_transformers-2.11.1 → x_transformers-2.11.2}/train_gpt_vae.py +0 -0
  51. {x_transformers-2.11.1 → x_transformers-2.11.2}/train_length_extrapolate.py +0 -0
  52. {x_transformers-2.11.1 → x_transformers-2.11.2}/train_parity.py +0 -0
  53. {x_transformers-2.11.1 → x_transformers-2.11.2}/train_with_muon.py +0 -0
  54. {x_transformers-2.11.1 → x_transformers-2.11.2}/x_transformers/__init__.py +0 -0
  55. {x_transformers-2.11.1 → x_transformers-2.11.2}/x_transformers/attend.py +0 -0
  56. {x_transformers-2.11.1 → x_transformers-2.11.2}/x_transformers/autoregressive_wrapper.py +0 -0
  57. {x_transformers-2.11.1 → x_transformers-2.11.2}/x_transformers/belief_state_wrapper.py +0 -0
  58. {x_transformers-2.11.1 → x_transformers-2.11.2}/x_transformers/continuous.py +0 -0
  59. {x_transformers-2.11.1 → x_transformers-2.11.2}/x_transformers/dpo.py +0 -0
  60. {x_transformers-2.11.1 → x_transformers-2.11.2}/x_transformers/entropy_based_tokenizer.py +0 -0
  61. {x_transformers-2.11.1 → x_transformers-2.11.2}/x_transformers/free_transformer.py +0 -0
  62. {x_transformers-2.11.1 → x_transformers-2.11.2}/x_transformers/multi_input.py +0 -0
  63. {x_transformers-2.11.1 → x_transformers-2.11.2}/x_transformers/neo_mlp.py +0 -0
  64. {x_transformers-2.11.1 → x_transformers-2.11.2}/x_transformers/nonautoregressive_wrapper.py +0 -0
  65. {x_transformers-2.11.1 → x_transformers-2.11.2}/x_transformers/up_wrapper.py +0 -0
  66. {x_transformers-2.11.1 → x_transformers-2.11.2}/x_transformers/x_transformers.py +0 -0
  67. {x_transformers-2.11.1 → x_transformers-2.11.2}/x_transformers/xl_autoregressive_wrapper.py +0 -0
  68. {x_transformers-2.11.1 → x_transformers-2.11.2}/x_transformers/xval.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: x-transformers
3
- Version: 2.11.1
3
+ Version: 2.11.2
4
4
  Summary: X-Transformers
5
5
  Project-URL: Homepage, https://pypi.org/project/x-transformers/
6
6
  Project-URL: Repository, https://github.com/lucidrains/x-transformers
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "x-transformers"
3
- version = "2.11.1"
3
+ version = "2.11.2"
4
4
  description = "X-Transformers"
5
5
  authors = [
6
6
  { name = "Phil Wang", email = "lucidrains@gmail.com" }
@@ -44,6 +44,7 @@ class GPTVAE(Module):
44
44
  enc_kwargs: dict = dict(),
45
45
  dec_kwargs: dict = dict(),
46
46
  vae_kl_loss_weight = 1.,
47
+ vae_kl_div_floor = 0., # what was done in free transformer, which in turn came from Kingma 2016
47
48
  latents_dropout_prob = 0.5, # what percentage of the time to dropout the latents completely
48
49
  pad_id = -1,
49
50
  encoder: Module | None = None,
@@ -99,6 +100,7 @@ class GPTVAE(Module):
99
100
 
100
101
  # loss weights - vae kl loss
101
102
 
103
+ self.vae_kl_div_floor = vae_kl_div_floor
102
104
  self.vae_kl_loss_weight = vae_kl_loss_weight
103
105
 
104
106
  self.latents_dropout = nn.Dropout(latents_dropout_prob)
@@ -190,12 +192,16 @@ class GPTVAE(Module):
190
192
 
191
193
  # vae kl loss
192
194
 
193
- vae_kl_loss = (
195
+ vae_kl_loss = 0.5 * (
194
196
  latents_log_var.exp()
195
197
  + latents_mean.square()
196
198
  - latents_log_var
197
199
  - 1.
198
- ).sum(dim = -1).mean()
200
+ )
201
+
202
+ vae_kl_loss = F.relu(vae_kl_loss - self.vae_kl_div_floor)
203
+
204
+ vae_kl_loss = vae_kl_loss.sum(dim = -1).mean()
199
205
 
200
206
  # return losses
201
207
 
File without changes