x-transformers 2.7.0__tar.gz → 2.7.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (63) hide show
  1. {x_transformers-2.7.0 → x_transformers-2.7.1}/PKG-INFO +1 -1
  2. {x_transformers-2.7.0 → x_transformers-2.7.1}/pyproject.toml +1 -1
  3. {x_transformers-2.7.0 → x_transformers-2.7.1}/x_transformers/continuous.py +10 -1
  4. {x_transformers-2.7.0 → x_transformers-2.7.1}/.github/FUNDING.yml +0 -0
  5. {x_transformers-2.7.0 → x_transformers-2.7.1}/.github/workflows/python-publish.yml +0 -0
  6. {x_transformers-2.7.0 → x_transformers-2.7.1}/.github/workflows/python-test.yaml +0 -0
  7. {x_transformers-2.7.0 → x_transformers-2.7.1}/.gitignore +0 -0
  8. {x_transformers-2.7.0 → x_transformers-2.7.1}/LICENSE +0 -0
  9. {x_transformers-2.7.0 → x_transformers-2.7.1}/README.md +0 -0
  10. {x_transformers-2.7.0 → x_transformers-2.7.1}/data/README.md +0 -0
  11. {x_transformers-2.7.0 → x_transformers-2.7.1}/data/enwik8.gz +0 -0
  12. {x_transformers-2.7.0 → x_transformers-2.7.1}/images/all-attention.png +0 -0
  13. {x_transformers-2.7.0 → x_transformers-2.7.1}/images/attention-on-attention.png +0 -0
  14. {x_transformers-2.7.0 → x_transformers-2.7.1}/images/cosine-sim-attention.png +0 -0
  15. {x_transformers-2.7.0 → x_transformers-2.7.1}/images/deepnorm.png +0 -0
  16. {x_transformers-2.7.0 → x_transformers-2.7.1}/images/dynamic-pos-bias-linear.png +0 -0
  17. {x_transformers-2.7.0 → x_transformers-2.7.1}/images/dynamic-pos-bias-log.png +0 -0
  18. {x_transformers-2.7.0 → x_transformers-2.7.1}/images/dynamic-pos-bias-sinusoidal.png +0 -0
  19. {x_transformers-2.7.0 → x_transformers-2.7.1}/images/dynamic-pos-bias.png +0 -0
  20. {x_transformers-2.7.0 → x_transformers-2.7.1}/images/enhanced-recurrence.png +0 -0
  21. {x_transformers-2.7.0 → x_transformers-2.7.1}/images/fcm.png +0 -0
  22. {x_transformers-2.7.0 → x_transformers-2.7.1}/images/ffglu.png +0 -0
  23. {x_transformers-2.7.0 → x_transformers-2.7.1}/images/flash-attention.png +0 -0
  24. {x_transformers-2.7.0 → x_transformers-2.7.1}/images/gate_values.png +0 -0
  25. {x_transformers-2.7.0 → x_transformers-2.7.1}/images/gating.png +0 -0
  26. {x_transformers-2.7.0 → x_transformers-2.7.1}/images/length-extrapolation-scale.png +0 -0
  27. {x_transformers-2.7.0 → x_transformers-2.7.1}/images/macaron-1.png +0 -0
  28. {x_transformers-2.7.0 → x_transformers-2.7.1}/images/macaron-2.png +0 -0
  29. {x_transformers-2.7.0 → x_transformers-2.7.1}/images/memory-transformer.png +0 -0
  30. {x_transformers-2.7.0 → x_transformers-2.7.1}/images/normformer.png +0 -0
  31. {x_transformers-2.7.0 → x_transformers-2.7.1}/images/pia.png +0 -0
  32. {x_transformers-2.7.0 → x_transformers-2.7.1}/images/qknorm-analysis.png +0 -0
  33. {x_transformers-2.7.0 → x_transformers-2.7.1}/images/resi_dual.png +0 -0
  34. {x_transformers-2.7.0 → x_transformers-2.7.1}/images/residual_attn.png +0 -0
  35. {x_transformers-2.7.0 → x_transformers-2.7.1}/images/rezero.png +0 -0
  36. {x_transformers-2.7.0 → x_transformers-2.7.1}/images/rotary.png +0 -0
  37. {x_transformers-2.7.0 → x_transformers-2.7.1}/images/sandwich-2.png +0 -0
  38. {x_transformers-2.7.0 → x_transformers-2.7.1}/images/sandwich.png +0 -0
  39. {x_transformers-2.7.0 → x_transformers-2.7.1}/images/sandwich_norm.png +0 -0
  40. {x_transformers-2.7.0 → x_transformers-2.7.1}/images/scalenorm.png +0 -0
  41. {x_transformers-2.7.0 → x_transformers-2.7.1}/images/talking-heads.png +0 -0
  42. {x_transformers-2.7.0 → x_transformers-2.7.1}/images/topk-attention.png +0 -0
  43. {x_transformers-2.7.0 → x_transformers-2.7.1}/images/xval.png +0 -0
  44. {x_transformers-2.7.0 → x_transformers-2.7.1}/tests/test_x_transformers.py +0 -0
  45. {x_transformers-2.7.0 → x_transformers-2.7.1}/train_belief_state.py +0 -0
  46. {x_transformers-2.7.0 → x_transformers-2.7.1}/train_copy.py +0 -0
  47. {x_transformers-2.7.0 → x_transformers-2.7.1}/train_entropy_tokenizer.py +0 -0
  48. {x_transformers-2.7.0 → x_transformers-2.7.1}/train_enwik8.py +0 -0
  49. {x_transformers-2.7.0 → x_transformers-2.7.1}/train_length_extrapolate.py +0 -0
  50. {x_transformers-2.7.0 → x_transformers-2.7.1}/train_parity.py +0 -0
  51. {x_transformers-2.7.0 → x_transformers-2.7.1}/x_transformers/__init__.py +0 -0
  52. {x_transformers-2.7.0 → x_transformers-2.7.1}/x_transformers/attend.py +0 -0
  53. {x_transformers-2.7.0 → x_transformers-2.7.1}/x_transformers/autoregressive_wrapper.py +0 -0
  54. {x_transformers-2.7.0 → x_transformers-2.7.1}/x_transformers/belief_state_wrapper.py +0 -0
  55. {x_transformers-2.7.0 → x_transformers-2.7.1}/x_transformers/dpo.py +0 -0
  56. {x_transformers-2.7.0 → x_transformers-2.7.1}/x_transformers/entropy_based_tokenizer.py +0 -0
  57. {x_transformers-2.7.0 → x_transformers-2.7.1}/x_transformers/multi_input.py +0 -0
  58. {x_transformers-2.7.0 → x_transformers-2.7.1}/x_transformers/neo_mlp.py +0 -0
  59. {x_transformers-2.7.0 → x_transformers-2.7.1}/x_transformers/nonautoregressive_wrapper.py +0 -0
  60. {x_transformers-2.7.0 → x_transformers-2.7.1}/x_transformers/up_wrapper.py +0 -0
  61. {x_transformers-2.7.0 → x_transformers-2.7.1}/x_transformers/x_transformers.py +0 -0
  62. {x_transformers-2.7.0 → x_transformers-2.7.1}/x_transformers/xl_autoregressive_wrapper.py +0 -0
  63. {x_transformers-2.7.0 → x_transformers-2.7.1}/x_transformers/xval.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: x-transformers
3
- Version: 2.7.0
3
+ Version: 2.7.1
4
4
  Summary: X-Transformers
5
5
  Project-URL: Homepage, https://pypi.org/project/x-transformers/
6
6
  Project-URL: Repository, https://github.com/lucidrains/x-transformers
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "x-transformers"
3
- version = "2.7.0"
3
+ version = "2.7.1"
4
4
  description = "X-Transformers"
5
5
  authors = [
6
6
  { name = "Phil Wang", email = "lucidrains@gmail.com" }
@@ -241,6 +241,7 @@ class ContinuousAutoregressiveWrapper(Module):
241
241
  self,
242
242
  net: ContinuousTransformerWrapper,
243
243
  loss_fn: Module | None = None,
244
+ use_l1_loss = False,
244
245
  equal_loss_weight_batch = False, # setting this to True, if the mask is passed in and sequences are variable in length, each sequence will be weighted the same (as opposed to each token)
245
246
  ):
246
247
  super().__init__()
@@ -250,7 +251,15 @@ class ContinuousAutoregressiveWrapper(Module):
250
251
  probabilistic = net.probabilistic
251
252
  self.probabilistic = probabilistic
252
253
 
253
- loss_fn = default(loss_fn, nn.MSELoss(reduction = 'none') if not probabilistic else GaussianNLL())
254
+ # default loss function
255
+
256
+ if not exists(loss_fn):
257
+ if probabilistic:
258
+ loss_fn = GaussianNLL()
259
+ elif use_l1_loss:
260
+ loss_fn = nn.L1Loss(reduction = 'none')
261
+ else:
262
+ loss_fn = nn.MSELoss(reduction = 'none')
254
263
 
255
264
  self.loss_fn = loss_fn
256
265
  self.equal_loss_weight_batch = equal_loss_weight_batch
File without changes
File without changes